]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/dlil.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / net / dlil.c
1 /*
2 * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/domain.h>
43 #include <sys/user.h>
44 #include <sys/random.h>
45 #include <sys/socketvar.h>
46 #include <net/if_dl.h>
47 #include <net/if.h>
48 #include <net/route.h>
49 #include <net/if_var.h>
50 #include <net/dlil.h>
51 #include <net/if_arp.h>
52 #include <net/iptap.h>
53 #include <net/pktap.h>
54 #include <sys/kern_event.h>
55 #include <sys/kdebug.h>
56 #include <sys/mcache.h>
57 #include <sys/syslog.h>
58 #include <sys/protosw.h>
59 #include <sys/priv.h>
60
61 #include <kern/assert.h>
62 #include <kern/task.h>
63 #include <kern/thread.h>
64 #include <kern/sched_prim.h>
65 #include <kern/locks.h>
66 #include <kern/zalloc.h>
67
68 #include <net/kpi_protocol.h>
69 #include <net/if_types.h>
70 #include <net/if_ipsec.h>
71 #include <net/if_llreach.h>
72 #include <net/if_utun.h>
73 #include <net/kpi_interfacefilter.h>
74 #include <net/classq/classq.h>
75 #include <net/classq/classq_sfb.h>
76 #include <net/flowhash.h>
77 #include <net/ntstat.h>
78 #include <net/if_llatbl.h>
79 #include <net/net_api_stats.h>
80 #include <net/if_ports_used.h>
81 #include <netinet/in.h>
82 #if INET
83 #include <netinet/in_var.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/tcp.h>
87 #include <netinet/tcp_var.h>
88 #include <netinet/udp.h>
89 #include <netinet/udp_var.h>
90 #include <netinet/if_ether.h>
91 #include <netinet/in_pcb.h>
92 #include <netinet/in_tclass.h>
93 #include <netinet/ip.h>
94 #include <netinet/ip_icmp.h>
95 #include <netinet/icmp_var.h>
96 #endif /* INET */
97
98 #if INET6
99 #include <net/nat464_utils.h>
100 #include <netinet6/in6_var.h>
101 #include <netinet6/nd6.h>
102 #include <netinet6/mld6_var.h>
103 #include <netinet6/scope6_var.h>
104 #include <netinet/ip6.h>
105 #include <netinet/icmp6.h>
106 #endif /* INET6 */
107 #include <net/pf_pbuf.h>
108 #include <libkern/OSAtomic.h>
109 #include <libkern/tree.h>
110
111 #include <dev/random/randomdev.h>
112 #include <machine/machine_routines.h>
113
114 #include <mach/thread_act.h>
115 #include <mach/sdt.h>
116
117 #if CONFIG_MACF
118 #include <sys/kauth.h>
119 #include <security/mac_framework.h>
120 #include <net/ethernet.h>
121 #include <net/firewire.h>
122 #endif
123
124 #if PF
125 #include <net/pfvar.h>
126 #endif /* PF */
127 #include <net/pktsched/pktsched.h>
128
129 #if NECP
130 #include <net/necp.h>
131 #endif /* NECP */
132
133
134 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
135 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
136 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
137 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
138 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
139
140 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
141 #define MAX_LINKADDR 4 /* LONGWORDS */
142 #define M_NKE M_IFADDR
143
144 #if 1
145 #define DLIL_PRINTF printf
146 #else
147 #define DLIL_PRINTF kprintf
148 #endif
149
150 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
151 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
152
153 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
155
156 enum {
157 kProtoKPI_v1 = 1,
158 kProtoKPI_v2 = 2
159 };
160
161 /*
162 * List of if_proto structures in if_proto_hash[] is protected by
163 * the ifnet lock. The rest of the fields are initialized at protocol
164 * attach time and never change, thus no lock required as long as
165 * a reference to it is valid, via if_proto_ref().
166 */
167 struct if_proto {
168 SLIST_ENTRY(if_proto) next_hash;
169 u_int32_t refcount;
170 u_int32_t detached;
171 struct ifnet *ifp;
172 protocol_family_t protocol_family;
173 int proto_kpi;
174 union {
175 struct {
176 proto_media_input input;
177 proto_media_preout pre_output;
178 proto_media_event event;
179 proto_media_ioctl ioctl;
180 proto_media_detached detached;
181 proto_media_resolve_multi resolve_multi;
182 proto_media_send_arp send_arp;
183 } v1;
184 struct {
185 proto_media_input_v2 input;
186 proto_media_preout pre_output;
187 proto_media_event event;
188 proto_media_ioctl ioctl;
189 proto_media_detached detached;
190 proto_media_resolve_multi resolve_multi;
191 proto_media_send_arp send_arp;
192 } v2;
193 } kpi;
194 };
195
196 SLIST_HEAD(proto_hash_entry, if_proto);
197
198 #define DLIL_SDLDATALEN \
199 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
200
201 struct dlil_ifnet {
202 struct ifnet dl_if; /* public ifnet */
203 /*
204 * DLIL private fields, protected by dl_if_lock
205 */
206 decl_lck_mtx_data(, dl_if_lock);
207 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
208 u_int32_t dl_if_flags; /* flags (below) */
209 u_int32_t dl_if_refcnt; /* refcnt */
210 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
211 void *dl_if_uniqueid; /* unique interface id */
212 size_t dl_if_uniqueid_len; /* length of the unique id */
213 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
214 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
215 struct {
216 struct ifaddr ifa; /* lladdr ifa */
217 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
218 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
219 } dl_if_lladdr;
220 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
221 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
222 ctrace_t dl_if_attach; /* attach PC stacktrace */
223 ctrace_t dl_if_detach; /* detach PC stacktrace */
224 };
225
226 /* Values for dl_if_flags (private to DLIL) */
227 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
228 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
229 #define DLIF_DEBUG 0x4 /* has debugging info */
230
231 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
232
233 /* For gdb */
234 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
235
236 struct dlil_ifnet_dbg {
237 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
238 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
239 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
240 /*
241 * Circular lists of ifnet_{reference,release} callers.
242 */
243 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
244 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
245 };
246
247 #define DLIL_TO_IFP(s) (&s->dl_if)
248 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
249
250 struct ifnet_filter {
251 TAILQ_ENTRY(ifnet_filter) filt_next;
252 u_int32_t filt_skip;
253 u_int32_t filt_flags;
254 ifnet_t filt_ifp;
255 const char *filt_name;
256 void *filt_cookie;
257 protocol_family_t filt_protocol;
258 iff_input_func filt_input;
259 iff_output_func filt_output;
260 iff_event_func filt_event;
261 iff_ioctl_func filt_ioctl;
262 iff_detached_func filt_detached;
263 };
264
265 struct proto_input_entry;
266
267 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
268 static lck_grp_t *dlil_lock_group;
269 lck_grp_t *ifnet_lock_group;
270 static lck_grp_t *ifnet_head_lock_group;
271 static lck_grp_t *ifnet_snd_lock_group;
272 static lck_grp_t *ifnet_rcv_lock_group;
273 lck_attr_t *ifnet_lock_attr;
274 decl_lck_rw_data(static, ifnet_head_lock);
275 decl_lck_mtx_data(static, dlil_ifnet_lock);
276 u_int32_t dlil_filter_disable_tso_count = 0;
277
278 #if DEBUG
279 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
280 #else
281 static unsigned int ifnet_debug; /* debugging (disabled) */
282 #endif /* !DEBUG */
283 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
284 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
285 static struct zone *dlif_zone; /* zone for dlil_ifnet */
286
287 #define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
288 #define DLIF_ZONE_NAME "ifnet" /* zone name */
289
290 static unsigned int dlif_filt_size; /* size of ifnet_filter */
291 static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
292
293 #define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
294 #define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
295
296 static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
297 static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
298
299 #define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
300 #define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
301
302 static unsigned int dlif_proto_size; /* size of if_proto */
303 static struct zone *dlif_proto_zone; /* zone for if_proto */
304
305 #define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
306 #define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
307
308 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
309 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
310 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
311
312 #define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
313 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
314
315 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
316 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
317 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
318
319 #define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
320 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
321
322 static u_int32_t net_rtref;
323
324 static struct dlil_main_threading_info dlil_main_input_thread_info;
325 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
326 (struct dlil_threading_info *)&dlil_main_input_thread_info;
327
328 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
329 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
330 static void dlil_if_trace(struct dlil_ifnet *, int);
331 static void if_proto_ref(struct if_proto *);
332 static void if_proto_free(struct if_proto *);
333 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
334 static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
335 u_int32_t list_count);
336 static void if_flt_monitor_busy(struct ifnet *);
337 static void if_flt_monitor_unbusy(struct ifnet *);
338 static void if_flt_monitor_enter(struct ifnet *);
339 static void if_flt_monitor_leave(struct ifnet *);
340 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
341 char **, protocol_family_t);
342 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
343 protocol_family_t);
344 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
345 const struct sockaddr_dl *);
346 static int ifnet_lookup(struct ifnet *);
347 static void if_purgeaddrs(struct ifnet *);
348
349 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
350 struct mbuf *, char *);
351 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
352 struct mbuf *);
353 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
354 mbuf_t *, const struct sockaddr *, void *, char *, char *);
355 static void ifproto_media_event(struct ifnet *, protocol_family_t,
356 const struct kev_msg *);
357 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
358 unsigned long, void *);
359 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
360 struct sockaddr_dl *, size_t);
361 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
362 const struct sockaddr_dl *, const struct sockaddr *,
363 const struct sockaddr_dl *, const struct sockaddr *);
364
365 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
366 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
367 boolean_t poll, struct thread *tp);
368 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
369 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
370 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
371 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
372 protocol_family_t *);
373 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
374 const struct ifnet_demux_desc *, u_int32_t);
375 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
376 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
377 #if CONFIG_EMBEDDED
378 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
379 const struct sockaddr *, const char *, const char *,
380 u_int32_t *, u_int32_t *);
381 #else
382 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
383 const struct sockaddr *, const char *, const char *);
384 #endif /* CONFIG_EMBEDDED */
385 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
386 const struct sockaddr *, const char *, const char *,
387 u_int32_t *, u_int32_t *);
388 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
389 static void ifp_if_free(struct ifnet *);
390 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
391 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
392 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
393
394 static void dlil_main_input_thread_func(void *, wait_result_t);
395 static void dlil_input_thread_func(void *, wait_result_t);
396 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
397 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
398 static void dlil_terminate_input_thread(struct dlil_threading_info *);
399 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
400 struct dlil_threading_info *, boolean_t);
401 static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
402 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
403 u_int32_t, ifnet_model_t, boolean_t);
404 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
405 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
406 static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
407 static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
408 static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
409 #if DEBUG || DEVELOPMENT
410 static void dlil_verify_sum16(void);
411 #endif /* DEBUG || DEVELOPMENT */
412 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
413 protocol_family_t);
414 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
415 protocol_family_t);
416
417 static void ifnet_detacher_thread_func(void *, wait_result_t);
418 static int ifnet_detacher_thread_cont(int);
419 static void ifnet_detach_final(struct ifnet *);
420 static void ifnet_detaching_enqueue(struct ifnet *);
421 static struct ifnet *ifnet_detaching_dequeue(void);
422
423 static void ifnet_start_thread_fn(void *, wait_result_t);
424 static void ifnet_poll_thread_fn(void *, wait_result_t);
425 static void ifnet_poll(struct ifnet *);
426 static errno_t ifnet_enqueue_common(struct ifnet *, void *,
427 classq_pkt_type_t, boolean_t, boolean_t *);
428
429 static void ifp_src_route_copyout(struct ifnet *, struct route *);
430 static void ifp_src_route_copyin(struct ifnet *, struct route *);
431 #if INET6
432 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
433 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
434 #endif /* INET6 */
435
436 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
437 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
438 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
439 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
440 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
441 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
442 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
443 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
444 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
445 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
446 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
447
448 struct chain_len_stats tx_chain_len_stats;
449 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
450
451 #if TEST_INPUT_THREAD_TERMINATION
452 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
453 #endif /* TEST_INPUT_THREAD_TERMINATION */
454
455 /* The following are protected by dlil_ifnet_lock */
456 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
457 static u_int32_t ifnet_detaching_cnt;
458 static void *ifnet_delayed_run; /* wait channel for detaching thread */
459
460 decl_lck_mtx_data(static, ifnet_fc_lock);
461
462 static uint32_t ifnet_flowhash_seed;
463
464 struct ifnet_flowhash_key {
465 char ifk_name[IFNAMSIZ];
466 uint32_t ifk_unit;
467 uint32_t ifk_flags;
468 uint32_t ifk_eflags;
469 uint32_t ifk_capabilities;
470 uint32_t ifk_capenable;
471 uint32_t ifk_output_sched_model;
472 uint32_t ifk_rand1;
473 uint32_t ifk_rand2;
474 };
475
476 /* Flow control entry per interface */
477 struct ifnet_fc_entry {
478 RB_ENTRY(ifnet_fc_entry) ifce_entry;
479 u_int32_t ifce_flowhash;
480 struct ifnet *ifce_ifp;
481 };
482
483 static uint32_t ifnet_calc_flowhash(struct ifnet *);
484 static int ifce_cmp(const struct ifnet_fc_entry *,
485 const struct ifnet_fc_entry *);
486 static int ifnet_fc_add(struct ifnet *);
487 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
488 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
489
490 /* protected by ifnet_fc_lock */
491 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
492 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
493 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
494
495 static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
496 static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
497
498 #define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
499 #define IFNET_FC_ZONE_MAX 32
500
501 extern void bpfdetach(struct ifnet *);
502 extern void proto_input_run(void);
503
504 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
505 u_int32_t flags);
506 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
507 u_int32_t flags);
508
509 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
510
511 #if CONFIG_MACF
512 #ifdef CONFIG_EMBEDDED
513 int dlil_lladdr_ckreq = 1;
514 #else
515 int dlil_lladdr_ckreq = 0;
516 #endif
517 #endif
518
519 #if DEBUG
520 int dlil_verbose = 1;
521 #else
522 int dlil_verbose = 0;
523 #endif /* DEBUG */
524 #if IFNET_INPUT_SANITY_CHK
525 /* sanity checking of input packet lists received */
526 static u_int32_t dlil_input_sanity_check = 0;
527 #endif /* IFNET_INPUT_SANITY_CHK */
528 /* rate limit debug messages */
529 struct timespec dlil_dbgrate = { 1, 0 };
530
531 SYSCTL_DECL(_net_link_generic_system);
532
533 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
534 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
535
536 #define IF_SNDQ_MINLEN 32
537 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
538 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
539 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
540 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
541
542 #define IF_RCVQ_MINLEN 32
543 #define IF_RCVQ_MAXLEN 256
544 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
545 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
546 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
547 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
548
549 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
550 static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
551 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
552 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
553 "ilog2 of EWMA decay rate of avg inbound packets");
554
555 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
556 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
557 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
558 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
559 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
560 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
561 "Q", "input poll mode freeze time");
562
563 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
564 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
565 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
566 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
567 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
568 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
569 "Q", "input poll sampling time");
570
571 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
572 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
573 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
574 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
575 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
576 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
577 "Q", "input poll interval (time)");
578
579 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
580 static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
581 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
582 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
583 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
584
585 #define IF_RXPOLL_WLOWAT 10
586 static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
587 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
588 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
589 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
590 "I", "input poll wakeup low watermark");
591
592 #define IF_RXPOLL_WHIWAT 100
593 static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
594 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
595 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
596 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
597 "I", "input poll wakeup high watermark");
598
599 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
600 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
601 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
602 "max packets per poll call");
603
604 static u_int32_t if_rxpoll = 1;
605 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
606 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
607 sysctl_rxpoll, "I", "enable opportunistic input polling");
608
609 #if TEST_INPUT_THREAD_TERMINATION
610 static u_int32_t if_input_thread_termination_spin = 0;
611 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
612 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
613 &if_input_thread_termination_spin, 0,
614 sysctl_input_thread_termination_spin,
615 "I", "input thread termination spin limit");
616 #endif /* TEST_INPUT_THREAD_TERMINATION */
617
618 static u_int32_t cur_dlil_input_threads = 0;
619 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
620 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
621 "Current number of DLIL input threads");
622
623 #if IFNET_INPUT_SANITY_CHK
624 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
625 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
626 "Turn on sanity checking in DLIL input");
627 #endif /* IFNET_INPUT_SANITY_CHK */
628
629 static u_int32_t if_flowadv = 1;
630 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
631 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
632 "enable flow-advisory mechanism");
633
634 static u_int32_t if_delaybased_queue = 1;
635 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
636 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
637 "enable delay based dynamic queue sizing");
638
639 static uint64_t hwcksum_in_invalidated = 0;
640 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
641 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
642 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
643
644 uint32_t hwcksum_dbg = 0;
645 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
647 "enable hardware cksum debugging");
648
649 u_int32_t ifnet_start_delayed = 0;
650 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
651 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
652 "number of times start was delayed");
653
654 u_int32_t ifnet_delay_start_disabled = 0;
655 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
656 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
657 "number of times start was delayed");
658
659 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
660 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
661 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
662 #define HWCKSUM_DBG_MASK \
663 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
664 HWCKSUM_DBG_FINALIZE_FORCED)
665
666 static uint32_t hwcksum_dbg_mode = 0;
667 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
668 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
669 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
670
671 static uint64_t hwcksum_dbg_partial_forced = 0;
672 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
673 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
674 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
675
676 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
677 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
678 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
679 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
680
681 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
682 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
683 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
684 &hwcksum_dbg_partial_rxoff_forced, 0,
685 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
686 "forced partial cksum rx offset");
687
688 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
689 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
690 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
691 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
692 "adjusted partial cksum rx offset");
693
694 static uint64_t hwcksum_dbg_verified = 0;
695 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
696 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
697 &hwcksum_dbg_verified, "packets verified for having good checksum");
698
699 static uint64_t hwcksum_dbg_bad_cksum = 0;
700 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
701 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
702 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
703
704 static uint64_t hwcksum_dbg_bad_rxoff = 0;
705 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
706 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
707 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
708
709 static uint64_t hwcksum_dbg_adjusted = 0;
710 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
711 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
712 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
713
714 static uint64_t hwcksum_dbg_finalized_hdr = 0;
715 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
716 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
717 &hwcksum_dbg_finalized_hdr, "finalized headers");
718
719 static uint64_t hwcksum_dbg_finalized_data = 0;
720 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
721 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
722 &hwcksum_dbg_finalized_data, "finalized payloads");
723
724 uint32_t hwcksum_tx = 1;
725 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
726 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
727 "enable transmit hardware checksum offload");
728
729 uint32_t hwcksum_rx = 1;
730 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
731 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
732 "enable receive hardware checksum offload");
733
734 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
735 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
736 sysctl_tx_chain_len_stats, "S", "");
737
738 uint32_t tx_chain_len_count = 0;
739 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
740 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
741
742 static uint32_t threshold_notify = 1; /* enable/disable */
743 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
744 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
745
746 static uint32_t threshold_interval = 2; /* in seconds */
747 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
748 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
749
750 #if (DEVELOPMENT || DEBUG)
751 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
752 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
753 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
754 #endif /* DEVELOPMENT || DEBUG */
755
756 struct net_api_stats net_api_stats;
757 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
758 &net_api_stats, net_api_stats, "");
759
760
761 unsigned int net_rxpoll = 1;
762 unsigned int net_affinity = 1;
763 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
764
765 extern u_int32_t inject_buckets;
766
767 static lck_grp_attr_t *dlil_grp_attributes = NULL;
768 static lck_attr_t *dlil_lck_attributes = NULL;
769
770 /* DLIL data threshold thread call */
771 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
772
773 static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t);
774
775 uint32_t dlil_rcv_mit_pkts_min = 5;
776 uint32_t dlil_rcv_mit_pkts_max = 64;
777 uint32_t dlil_rcv_mit_interval = (500 * 1000);
778
779 #if (DEVELOPMENT || DEBUG)
780 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min,
781 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, "");
782 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max,
783 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, "");
784 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval,
785 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, "");
786 #endif /* DEVELOPMENT || DEBUG */
787
788
789 #define DLIL_INPUT_CHECK(m, ifp) { \
790 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
791 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
792 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
793 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
794 /* NOTREACHED */ \
795 } \
796 }
797
798 #define DLIL_EWMA(old, new, decay) do { \
799 u_int32_t _avg; \
800 if ((_avg = (old)) > 0) \
801 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
802 else \
803 _avg = (new); \
804 (old) = _avg; \
805 } while (0)
806
807 #define MBPS (1ULL * 1000 * 1000)
808 #define GBPS (MBPS * 1000)
809
810 struct rxpoll_time_tbl {
811 u_int64_t speed; /* downlink speed */
812 u_int32_t plowat; /* packets low watermark */
813 u_int32_t phiwat; /* packets high watermark */
814 u_int32_t blowat; /* bytes low watermark */
815 u_int32_t bhiwat; /* bytes high watermark */
816 };
817
818 static struct rxpoll_time_tbl rxpoll_tbl[] = {
819 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
820 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
821 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
822 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
823 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
824 { 0, 0, 0, 0, 0 }
825 };
826
827 int
828 proto_hash_value(u_int32_t protocol_family)
829 {
830 /*
831 * dlil_proto_unplumb_all() depends on the mapping between
832 * the hash bucket index and the protocol family defined
833 * here; future changes must be applied there as well.
834 */
835 switch (protocol_family) {
836 case PF_INET:
837 return 0;
838 case PF_INET6:
839 return 1;
840 case PF_VLAN:
841 return 2;
842 case PF_UNSPEC:
843 default:
844 return 3;
845 }
846 }
847
848 /*
849 * Caller must already be holding ifnet lock.
850 */
851 static struct if_proto *
852 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
853 {
854 struct if_proto *proto = NULL;
855 u_int32_t i = proto_hash_value(protocol_family);
856
857 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
858
859 if (ifp->if_proto_hash != NULL) {
860 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
861 }
862
863 while (proto != NULL && proto->protocol_family != protocol_family) {
864 proto = SLIST_NEXT(proto, next_hash);
865 }
866
867 if (proto != NULL) {
868 if_proto_ref(proto);
869 }
870
871 return proto;
872 }
873
874 static void
875 if_proto_ref(struct if_proto *proto)
876 {
877 atomic_add_32(&proto->refcount, 1);
878 }
879
880 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
881
882 static void
883 if_proto_free(struct if_proto *proto)
884 {
885 u_int32_t oldval;
886 struct ifnet *ifp = proto->ifp;
887 u_int32_t proto_family = proto->protocol_family;
888 struct kev_dl_proto_data ev_pr_data;
889
890 oldval = atomic_add_32_ov(&proto->refcount, -1);
891 if (oldval > 1) {
892 return;
893 }
894
895 /* No more reference on this, protocol must have been detached */
896 VERIFY(proto->detached);
897
898 if (proto->proto_kpi == kProtoKPI_v1) {
899 if (proto->kpi.v1.detached) {
900 proto->kpi.v1.detached(ifp, proto->protocol_family);
901 }
902 }
903 if (proto->proto_kpi == kProtoKPI_v2) {
904 if (proto->kpi.v2.detached) {
905 proto->kpi.v2.detached(ifp, proto->protocol_family);
906 }
907 }
908
909 /*
910 * Cleanup routes that may still be in the routing table for that
911 * interface/protocol pair.
912 */
913 if_rtproto_del(ifp, proto_family);
914
915 /*
916 * The reserved field carries the number of protocol still attached
917 * (subject to change)
918 */
919 ifnet_lock_shared(ifp);
920 ev_pr_data.proto_family = proto_family;
921 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
922 ifnet_lock_done(ifp);
923
924 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
925 (struct net_event_data *)&ev_pr_data,
926 sizeof(struct kev_dl_proto_data));
927
928 if (ev_pr_data.proto_remaining_count == 0) {
929 /*
930 * The protocol count has gone to zero, mark the interface down.
931 * This used to be done by configd.KernelEventMonitor, but that
932 * is inherently prone to races (rdar://problem/30810208).
933 */
934 (void) ifnet_set_flags(ifp, 0, IFF_UP);
935 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
936 dlil_post_sifflags_msg(ifp);
937 }
938
939 zfree(dlif_proto_zone, proto);
940 }
941
942 __private_extern__ void
943 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
944 {
945 #if !MACH_ASSERT
946 #pragma unused(ifp)
947 #endif
948 unsigned int type = 0;
949 int ass = 1;
950
951 switch (what) {
952 case IFNET_LCK_ASSERT_EXCLUSIVE:
953 type = LCK_RW_ASSERT_EXCLUSIVE;
954 break;
955
956 case IFNET_LCK_ASSERT_SHARED:
957 type = LCK_RW_ASSERT_SHARED;
958 break;
959
960 case IFNET_LCK_ASSERT_OWNED:
961 type = LCK_RW_ASSERT_HELD;
962 break;
963
964 case IFNET_LCK_ASSERT_NOTOWNED:
965 /* nothing to do here for RW lock; bypass assert */
966 ass = 0;
967 break;
968
969 default:
970 panic("bad ifnet assert type: %d", what);
971 /* NOTREACHED */
972 }
973 if (ass) {
974 LCK_RW_ASSERT(&ifp->if_lock, type);
975 }
976 }
977
978 __private_extern__ void
979 ifnet_lock_shared(struct ifnet *ifp)
980 {
981 lck_rw_lock_shared(&ifp->if_lock);
982 }
983
984 __private_extern__ void
985 ifnet_lock_exclusive(struct ifnet *ifp)
986 {
987 lck_rw_lock_exclusive(&ifp->if_lock);
988 }
989
990 __private_extern__ void
991 ifnet_lock_done(struct ifnet *ifp)
992 {
993 lck_rw_done(&ifp->if_lock);
994 }
995
996 #if INET
997 __private_extern__ void
998 if_inetdata_lock_shared(struct ifnet *ifp)
999 {
1000 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1001 }
1002
1003 __private_extern__ void
1004 if_inetdata_lock_exclusive(struct ifnet *ifp)
1005 {
1006 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1007 }
1008
1009 __private_extern__ void
1010 if_inetdata_lock_done(struct ifnet *ifp)
1011 {
1012 lck_rw_done(&ifp->if_inetdata_lock);
1013 }
1014 #endif
1015
1016 #if INET6
1017 __private_extern__ void
1018 if_inet6data_lock_shared(struct ifnet *ifp)
1019 {
1020 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1021 }
1022
1023 __private_extern__ void
1024 if_inet6data_lock_exclusive(struct ifnet *ifp)
1025 {
1026 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1027 }
1028
1029 __private_extern__ void
1030 if_inet6data_lock_done(struct ifnet *ifp)
1031 {
1032 lck_rw_done(&ifp->if_inet6data_lock);
1033 }
1034 #endif
1035
1036 __private_extern__ void
1037 ifnet_head_lock_shared(void)
1038 {
1039 lck_rw_lock_shared(&ifnet_head_lock);
1040 }
1041
1042 __private_extern__ void
1043 ifnet_head_lock_exclusive(void)
1044 {
1045 lck_rw_lock_exclusive(&ifnet_head_lock);
1046 }
1047
1048 __private_extern__ void
1049 ifnet_head_done(void)
1050 {
1051 lck_rw_done(&ifnet_head_lock);
1052 }
1053
1054 __private_extern__ void
1055 ifnet_head_assert_exclusive(void)
1056 {
1057 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1058 }
1059
1060 /*
1061 * dlil_ifp_protolist
1062 * - get the list of protocols attached to the interface, or just the number
1063 * of attached protocols
1064 * - if the number returned is greater than 'list_count', truncation occurred
1065 *
1066 * Note:
1067 * - caller must already be holding ifnet lock.
1068 */
1069 static u_int32_t
1070 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1071 u_int32_t list_count)
1072 {
1073 u_int32_t count = 0;
1074 int i;
1075
1076 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1077
1078 if (ifp->if_proto_hash == NULL) {
1079 goto done;
1080 }
1081
1082 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1083 struct if_proto *proto;
1084 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1085 if (list != NULL && count < list_count) {
1086 list[count] = proto->protocol_family;
1087 }
1088 count++;
1089 }
1090 }
1091 done:
1092 return count;
1093 }
1094
1095 __private_extern__ u_int32_t
1096 if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1097 {
1098 ifnet_lock_shared(ifp);
1099 count = dlil_ifp_protolist(ifp, protolist, count);
1100 ifnet_lock_done(ifp);
1101 return count;
1102 }
1103
1104 __private_extern__ void
1105 if_free_protolist(u_int32_t *list)
1106 {
1107 _FREE(list, M_TEMP);
1108 }
1109
1110 __private_extern__ void
1111 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1112 u_int32_t event_code, struct net_event_data *event_data,
1113 u_int32_t event_data_len)
1114 {
1115 struct net_event_data ev_data;
1116 struct kev_msg ev_msg;
1117
1118 bzero(&ev_msg, sizeof(ev_msg));
1119 bzero(&ev_data, sizeof(ev_data));
1120 /*
1121 * a net event always starts with a net_event_data structure
1122 * but the caller can generate a simple net event or
1123 * provide a longer event structure to post
1124 */
1125 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1126 ev_msg.kev_class = KEV_NETWORK_CLASS;
1127 ev_msg.kev_subclass = event_subclass;
1128 ev_msg.event_code = event_code;
1129
1130 if (event_data == NULL) {
1131 event_data = &ev_data;
1132 event_data_len = sizeof(struct net_event_data);
1133 }
1134
1135 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1136 event_data->if_family = ifp->if_family;
1137 event_data->if_unit = (u_int32_t)ifp->if_unit;
1138
1139 ev_msg.dv[0].data_length = event_data_len;
1140 ev_msg.dv[0].data_ptr = event_data;
1141 ev_msg.dv[1].data_length = 0;
1142
1143 /* Don't update interface generation for quality and RRC state changess */
1144 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1145 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1146 event_code != KEV_DL_RRC_STATE_CHANGED));
1147
1148 dlil_event_internal(ifp, &ev_msg, update_generation);
1149 }
1150
1151 __private_extern__ int
1152 dlil_alloc_local_stats(struct ifnet *ifp)
1153 {
1154 int ret = EINVAL;
1155 void *buf, *base, **pbuf;
1156
1157 if (ifp == NULL) {
1158 goto end;
1159 }
1160
1161 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1162 /* allocate tcpstat_local structure */
1163 buf = zalloc(dlif_tcpstat_zone);
1164 if (buf == NULL) {
1165 ret = ENOMEM;
1166 goto end;
1167 }
1168 bzero(buf, dlif_tcpstat_bufsize);
1169
1170 /* Get the 64-bit aligned base address for this object */
1171 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1172 sizeof(u_int64_t));
1173 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1174 ((intptr_t)buf + dlif_tcpstat_bufsize));
1175
1176 /*
1177 * Wind back a pointer size from the aligned base and
1178 * save the original address so we can free it later.
1179 */
1180 pbuf = (void **)((intptr_t)base - sizeof(void *));
1181 *pbuf = buf;
1182 ifp->if_tcp_stat = base;
1183
1184 /* allocate udpstat_local structure */
1185 buf = zalloc(dlif_udpstat_zone);
1186 if (buf == NULL) {
1187 ret = ENOMEM;
1188 goto end;
1189 }
1190 bzero(buf, dlif_udpstat_bufsize);
1191
1192 /* Get the 64-bit aligned base address for this object */
1193 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1194 sizeof(u_int64_t));
1195 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1196 ((intptr_t)buf + dlif_udpstat_bufsize));
1197
1198 /*
1199 * Wind back a pointer size from the aligned base and
1200 * save the original address so we can free it later.
1201 */
1202 pbuf = (void **)((intptr_t)base - sizeof(void *));
1203 *pbuf = buf;
1204 ifp->if_udp_stat = base;
1205
1206 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
1207 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
1208
1209 ret = 0;
1210 }
1211
1212 if (ifp->if_ipv4_stat == NULL) {
1213 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1214 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1215 if (ifp->if_ipv4_stat == NULL) {
1216 ret = ENOMEM;
1217 goto end;
1218 }
1219 }
1220
1221 if (ifp->if_ipv6_stat == NULL) {
1222 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1223 sizeof(struct if_tcp_ecn_stat), M_TEMP, M_WAITOK | M_ZERO);
1224 if (ifp->if_ipv6_stat == NULL) {
1225 ret = ENOMEM;
1226 goto end;
1227 }
1228 }
1229 end:
1230 if (ret != 0) {
1231 if (ifp->if_tcp_stat != NULL) {
1232 pbuf = (void **)
1233 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
1234 zfree(dlif_tcpstat_zone, *pbuf);
1235 ifp->if_tcp_stat = NULL;
1236 }
1237 if (ifp->if_udp_stat != NULL) {
1238 pbuf = (void **)
1239 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
1240 zfree(dlif_udpstat_zone, *pbuf);
1241 ifp->if_udp_stat = NULL;
1242 }
1243 if (ifp->if_ipv4_stat != NULL) {
1244 FREE(ifp->if_ipv4_stat, M_TEMP);
1245 ifp->if_ipv4_stat = NULL;
1246 }
1247 if (ifp->if_ipv6_stat != NULL) {
1248 FREE(ifp->if_ipv6_stat, M_TEMP);
1249 ifp->if_ipv6_stat = NULL;
1250 }
1251 }
1252
1253 return ret;
1254 }
1255
1256 static int
1257 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1258 {
1259 thread_continue_t func;
1260 u_int32_t limit;
1261 int error;
1262
1263 /* NULL ifp indicates the main input thread, called at dlil_init time */
1264 if (ifp == NULL) {
1265 func = dlil_main_input_thread_func;
1266 VERIFY(inp == dlil_main_input_thread);
1267 (void) strlcat(inp->input_name,
1268 "main_input", DLIL_THREADNAME_LEN);
1269 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1270 func = dlil_rxpoll_input_thread_func;
1271 VERIFY(inp != dlil_main_input_thread);
1272 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1273 "%s_input_poll", if_name(ifp));
1274 } else {
1275 func = dlil_input_thread_func;
1276 VERIFY(inp != dlil_main_input_thread);
1277 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1278 "%s_input", if_name(ifp));
1279 }
1280 VERIFY(inp->input_thr == THREAD_NULL);
1281
1282 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1283 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1284
1285 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1286 inp->ifp = ifp; /* NULL for main input thread */
1287
1288 net_timerclear(&inp->mode_holdtime);
1289 net_timerclear(&inp->mode_lasttime);
1290 net_timerclear(&inp->sample_holdtime);
1291 net_timerclear(&inp->sample_lasttime);
1292 net_timerclear(&inp->dbg_lasttime);
1293
1294 /*
1295 * For interfaces that support opportunistic polling, set the
1296 * low and high watermarks for outstanding inbound packets/bytes.
1297 * Also define freeze times for transitioning between modes
1298 * and updating the average.
1299 */
1300 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1301 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1302 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1303 } else {
1304 limit = (u_int32_t)-1;
1305 }
1306
1307 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1308 if (inp == dlil_main_input_thread) {
1309 struct dlil_main_threading_info *inpm =
1310 (struct dlil_main_threading_info *)inp;
1311 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1312 }
1313
1314 error = kernel_thread_start(func, inp, &inp->input_thr);
1315 if (error == KERN_SUCCESS) {
1316 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1317 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_NETISR));
1318 /*
1319 * We create an affinity set so that the matching workloop
1320 * thread or the starter thread (for loopback) can be
1321 * scheduled on the same processor set as the input thread.
1322 */
1323 if (net_affinity) {
1324 struct thread *tp = inp->input_thr;
1325 u_int32_t tag;
1326 /*
1327 * Randomize to reduce the probability
1328 * of affinity tag namespace collision.
1329 */
1330 read_frandom(&tag, sizeof(tag));
1331 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1332 thread_reference(tp);
1333 inp->tag = tag;
1334 inp->net_affinity = TRUE;
1335 }
1336 }
1337 } else if (inp == dlil_main_input_thread) {
1338 panic_plain("%s: couldn't create main input thread", __func__);
1339 /* NOTREACHED */
1340 } else {
1341 panic_plain("%s: couldn't create %s input thread", __func__,
1342 if_name(ifp));
1343 /* NOTREACHED */
1344 }
1345 OSAddAtomic(1, &cur_dlil_input_threads);
1346
1347 return error;
1348 }
1349
1350 #if TEST_INPUT_THREAD_TERMINATION
1351 static int
1352 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1353 {
1354 #pragma unused(arg1, arg2)
1355 uint32_t i;
1356 int err;
1357
1358 i = if_input_thread_termination_spin;
1359
1360 err = sysctl_handle_int(oidp, &i, 0, req);
1361 if (err != 0 || req->newptr == USER_ADDR_NULL) {
1362 return err;
1363 }
1364
1365 if (net_rxpoll == 0) {
1366 return ENXIO;
1367 }
1368
1369 if_input_thread_termination_spin = i;
1370 return err;
1371 }
1372 #endif /* TEST_INPUT_THREAD_TERMINATION */
1373
1374 static void
1375 dlil_clean_threading_info(struct dlil_threading_info *inp)
1376 {
1377 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1378 lck_grp_free(inp->lck_grp);
1379
1380 inp->input_waiting = 0;
1381 inp->wtot = 0;
1382 bzero(inp->input_name, sizeof(inp->input_name));
1383 inp->ifp = NULL;
1384 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1385 qlimit(&inp->rcvq_pkts) = 0;
1386 bzero(&inp->stats, sizeof(inp->stats));
1387
1388 VERIFY(!inp->net_affinity);
1389 inp->input_thr = THREAD_NULL;
1390 VERIFY(inp->wloop_thr == THREAD_NULL);
1391 VERIFY(inp->poll_thr == THREAD_NULL);
1392 VERIFY(inp->tag == 0);
1393
1394 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1395 bzero(&inp->tstats, sizeof(inp->tstats));
1396 bzero(&inp->pstats, sizeof(inp->pstats));
1397 bzero(&inp->sstats, sizeof(inp->sstats));
1398
1399 net_timerclear(&inp->mode_holdtime);
1400 net_timerclear(&inp->mode_lasttime);
1401 net_timerclear(&inp->sample_holdtime);
1402 net_timerclear(&inp->sample_lasttime);
1403 net_timerclear(&inp->dbg_lasttime);
1404
1405 #if IFNET_INPUT_SANITY_CHK
1406 inp->input_mbuf_cnt = 0;
1407 #endif /* IFNET_INPUT_SANITY_CHK */
1408 }
1409
1410 static void
1411 dlil_terminate_input_thread(struct dlil_threading_info *inp)
1412 {
1413 struct ifnet *ifp = inp->ifp;
1414
1415 VERIFY(current_thread() == inp->input_thr);
1416 VERIFY(inp != dlil_main_input_thread);
1417
1418 OSAddAtomic(-1, &cur_dlil_input_threads);
1419
1420 #if TEST_INPUT_THREAD_TERMINATION
1421 { /* do something useless that won't get optimized away */
1422 uint32_t v = 1;
1423 for (uint32_t i = 0;
1424 i < if_input_thread_termination_spin;
1425 i++) {
1426 v = (i + 1) * v;
1427 }
1428 printf("the value is %d\n", v);
1429 }
1430 #endif /* TEST_INPUT_THREAD_TERMINATION */
1431
1432 lck_mtx_lock_spin(&inp->input_lck);
1433 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1434 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1435 wakeup_one((caddr_t)&inp->input_waiting);
1436 lck_mtx_unlock(&inp->input_lck);
1437
1438 /* for the extra refcnt from kernel_thread_start() */
1439 thread_deallocate(current_thread());
1440
1441 if (dlil_verbose) {
1442 printf("%s: input thread terminated\n",
1443 if_name(ifp));
1444 }
1445
1446 /* this is the end */
1447 thread_terminate(current_thread());
1448 /* NOTREACHED */
1449 }
1450
1451 static kern_return_t
1452 dlil_affinity_set(struct thread *tp, u_int32_t tag)
1453 {
1454 thread_affinity_policy_data_t policy;
1455
1456 bzero(&policy, sizeof(policy));
1457 policy.affinity_tag = tag;
1458 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1459 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
1460 }
1461
1462 void
1463 dlil_init(void)
1464 {
1465 thread_t thread = THREAD_NULL;
1466
1467 /*
1468 * The following fields must be 64-bit aligned for atomic operations.
1469 */
1470 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1471 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1472 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1473 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1474 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1475 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1476 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1477 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1478 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1479 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1480 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1481 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1482 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1483 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1484 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1485
1486 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1487 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1488 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1489 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1490 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1491 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1492 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1493 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1494 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1495 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1496 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1497 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1498 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1499 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1500 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1501
1502 /*
1503 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1504 */
1505 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1506 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1507 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1508 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1509 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1510 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1511 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1512 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1513 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1514 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1515 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1516 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1517 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1518 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1519
1520 /*
1521 * ... as well as the mbuf checksum flags counterparts.
1522 */
1523 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1524 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1525 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1526 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1527 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1528 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1529 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1530 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1531 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1532 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1533 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1534
1535 /*
1536 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1537 */
1538 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1539 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1540
1541 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1542 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1543 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1544 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1545
1546 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1547 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1548 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1549
1550 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1551 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1552 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1553 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1554 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1555 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1556 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1557 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1558 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1559 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1560 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1561 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1562 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1563 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1564 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1565 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1566
1567 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1568 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1569 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1570 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1571 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1572 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1573 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1574
1575 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1576 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1577
1578 PE_parse_boot_argn("net_affinity", &net_affinity,
1579 sizeof(net_affinity));
1580
1581 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1582
1583 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1584
1585 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1586
1587 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
1588 sizeof(struct dlil_ifnet_dbg);
1589 /* Enforce 64-bit alignment for dlil_ifnet structure */
1590 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
1591 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
1592 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1593 0, DLIF_ZONE_NAME);
1594 if (dlif_zone == NULL) {
1595 panic_plain("%s: failed allocating %s", __func__,
1596 DLIF_ZONE_NAME);
1597 /* NOTREACHED */
1598 }
1599 zone_change(dlif_zone, Z_EXPAND, TRUE);
1600 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1601
1602 dlif_filt_size = sizeof(struct ifnet_filter);
1603 dlif_filt_zone = zinit(dlif_filt_size,
1604 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1605 if (dlif_filt_zone == NULL) {
1606 panic_plain("%s: failed allocating %s", __func__,
1607 DLIF_FILT_ZONE_NAME);
1608 /* NOTREACHED */
1609 }
1610 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1611 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1612
1613 dlif_phash_size = sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS;
1614 dlif_phash_zone = zinit(dlif_phash_size,
1615 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1616 if (dlif_phash_zone == NULL) {
1617 panic_plain("%s: failed allocating %s", __func__,
1618 DLIF_PHASH_ZONE_NAME);
1619 /* NOTREACHED */
1620 }
1621 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1622 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1623
1624 dlif_proto_size = sizeof(struct if_proto);
1625 dlif_proto_zone = zinit(dlif_proto_size,
1626 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1627 if (dlif_proto_zone == NULL) {
1628 panic_plain("%s: failed allocating %s", __func__,
1629 DLIF_PROTO_ZONE_NAME);
1630 /* NOTREACHED */
1631 }
1632 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1633 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1634
1635 dlif_tcpstat_size = sizeof(struct tcpstat_local);
1636 /* Enforce 64-bit alignment for tcpstat_local structure */
1637 dlif_tcpstat_bufsize =
1638 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
1639 dlif_tcpstat_bufsize =
1640 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
1641 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1642 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1643 DLIF_TCPSTAT_ZONE_NAME);
1644 if (dlif_tcpstat_zone == NULL) {
1645 panic_plain("%s: failed allocating %s", __func__,
1646 DLIF_TCPSTAT_ZONE_NAME);
1647 /* NOTREACHED */
1648 }
1649 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1650 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1651
1652 dlif_udpstat_size = sizeof(struct udpstat_local);
1653 /* Enforce 64-bit alignment for udpstat_local structure */
1654 dlif_udpstat_bufsize =
1655 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
1656 dlif_udpstat_bufsize =
1657 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
1658 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1659 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1660 DLIF_UDPSTAT_ZONE_NAME);
1661 if (dlif_udpstat_zone == NULL) {
1662 panic_plain("%s: failed allocating %s", __func__,
1663 DLIF_UDPSTAT_ZONE_NAME);
1664 /* NOTREACHED */
1665 }
1666 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1667 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1668
1669 ifnet_llreach_init();
1670 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1671
1672 TAILQ_INIT(&dlil_ifnet_head);
1673 TAILQ_INIT(&ifnet_head);
1674 TAILQ_INIT(&ifnet_detaching_head);
1675 TAILQ_INIT(&ifnet_ordered_head);
1676
1677 /* Setup the lock groups we will use */
1678 dlil_grp_attributes = lck_grp_attr_alloc_init();
1679
1680 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1681 dlil_grp_attributes);
1682 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1683 dlil_grp_attributes);
1684 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1685 dlil_grp_attributes);
1686 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1687 dlil_grp_attributes);
1688 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1689 dlil_grp_attributes);
1690
1691 /* Setup the lock attributes we will use */
1692 dlil_lck_attributes = lck_attr_alloc_init();
1693
1694 ifnet_lock_attr = lck_attr_alloc_init();
1695
1696 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1697 dlil_lck_attributes);
1698 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1699
1700 /* Setup interface flow control related items */
1701 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1702
1703 ifnet_fc_zone_size = sizeof(struct ifnet_fc_entry);
1704 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1705 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1706 if (ifnet_fc_zone == NULL) {
1707 panic_plain("%s: failed allocating %s", __func__,
1708 IFNET_FC_ZONE_NAME);
1709 /* NOTREACHED */
1710 }
1711 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1712 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1713
1714 /* Initialize interface address subsystem */
1715 ifa_init();
1716
1717 #if PF
1718 /* Initialize the packet filter */
1719 pfinit();
1720 #endif /* PF */
1721
1722 /* Initialize queue algorithms */
1723 classq_init();
1724
1725 /* Initialize packet schedulers */
1726 pktsched_init();
1727
1728 /* Initialize flow advisory subsystem */
1729 flowadv_init();
1730
1731 /* Initialize the pktap virtual interface */
1732 pktap_init();
1733
1734 /* Initialize the service class to dscp map */
1735 net_qos_map_init();
1736
1737 /* Initialize the interface port list */
1738 if_ports_used_init();
1739
1740 /* Initialize the interface low power mode event handler */
1741 if_low_power_evhdlr_init();
1742
1743 #if DEBUG || DEVELOPMENT
1744 /* Run self-tests */
1745 dlil_verify_sum16();
1746 #endif /* DEBUG || DEVELOPMENT */
1747
1748 /* Initialize link layer table */
1749 lltable_glbl_init();
1750
1751 /*
1752 * Create and start up the main DLIL input thread and the interface
1753 * detacher threads once everything is initialized.
1754 */
1755 dlil_create_input_thread(NULL, dlil_main_input_thread);
1756
1757 if (kernel_thread_start(ifnet_detacher_thread_func,
1758 NULL, &thread) != KERN_SUCCESS) {
1759 panic_plain("%s: couldn't create detacher thread", __func__);
1760 /* NOTREACHED */
1761 }
1762 thread_deallocate(thread);
1763 }
1764
1765 static void
1766 if_flt_monitor_busy(struct ifnet *ifp)
1767 {
1768 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1769
1770 ++ifp->if_flt_busy;
1771 VERIFY(ifp->if_flt_busy != 0);
1772 }
1773
1774 static void
1775 if_flt_monitor_unbusy(struct ifnet *ifp)
1776 {
1777 if_flt_monitor_leave(ifp);
1778 }
1779
1780 static void
1781 if_flt_monitor_enter(struct ifnet *ifp)
1782 {
1783 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1784
1785 while (ifp->if_flt_busy) {
1786 ++ifp->if_flt_waiters;
1787 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1788 (PZERO - 1), "if_flt_monitor", NULL);
1789 }
1790 if_flt_monitor_busy(ifp);
1791 }
1792
1793 static void
1794 if_flt_monitor_leave(struct ifnet *ifp)
1795 {
1796 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1797
1798 VERIFY(ifp->if_flt_busy != 0);
1799 --ifp->if_flt_busy;
1800
1801 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1802 ifp->if_flt_waiters = 0;
1803 wakeup(&ifp->if_flt_head);
1804 }
1805 }
1806
1807 __private_extern__ int
1808 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1809 interface_filter_t *filter_ref, u_int32_t flags)
1810 {
1811 int retval = 0;
1812 struct ifnet_filter *filter = NULL;
1813
1814 ifnet_head_lock_shared();
1815 /* Check that the interface is in the global list */
1816 if (!ifnet_lookup(ifp)) {
1817 retval = ENXIO;
1818 goto done;
1819 }
1820
1821 filter = zalloc(dlif_filt_zone);
1822 if (filter == NULL) {
1823 retval = ENOMEM;
1824 goto done;
1825 }
1826 bzero(filter, dlif_filt_size);
1827
1828 /* refcnt held above during lookup */
1829 filter->filt_flags = flags;
1830 filter->filt_ifp = ifp;
1831 filter->filt_cookie = if_filter->iff_cookie;
1832 filter->filt_name = if_filter->iff_name;
1833 filter->filt_protocol = if_filter->iff_protocol;
1834 /*
1835 * Do not install filter callbacks for internal coproc interface
1836 */
1837 if (!IFNET_IS_INTCOPROC(ifp)) {
1838 filter->filt_input = if_filter->iff_input;
1839 filter->filt_output = if_filter->iff_output;
1840 filter->filt_event = if_filter->iff_event;
1841 filter->filt_ioctl = if_filter->iff_ioctl;
1842 }
1843 filter->filt_detached = if_filter->iff_detached;
1844
1845 lck_mtx_lock(&ifp->if_flt_lock);
1846 if_flt_monitor_enter(ifp);
1847
1848 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1849 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1850
1851 if_flt_monitor_leave(ifp);
1852 lck_mtx_unlock(&ifp->if_flt_lock);
1853
1854 *filter_ref = filter;
1855
1856 /*
1857 * Bump filter count and route_generation ID to let TCP
1858 * know it shouldn't do TSO on this connection
1859 */
1860 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1861 OSAddAtomic(1, &dlil_filter_disable_tso_count);
1862 routegenid_update();
1863 }
1864 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1865 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1866 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1867 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1868 }
1869 if (dlil_verbose) {
1870 printf("%s: %s filter attached\n", if_name(ifp),
1871 if_filter->iff_name);
1872 }
1873 done:
1874 ifnet_head_done();
1875 if (retval != 0 && ifp != NULL) {
1876 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1877 if_name(ifp), if_filter->iff_name, retval);
1878 }
1879 if (retval != 0 && filter != NULL) {
1880 zfree(dlif_filt_zone, filter);
1881 }
1882
1883 return retval;
1884 }
1885
1886 static int
1887 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1888 {
1889 int retval = 0;
1890
1891 if (detached == 0) {
1892 ifnet_t ifp = NULL;
1893
1894 ifnet_head_lock_shared();
1895 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1896 interface_filter_t entry = NULL;
1897
1898 lck_mtx_lock(&ifp->if_flt_lock);
1899 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1900 if (entry != filter || entry->filt_skip) {
1901 continue;
1902 }
1903 /*
1904 * We've found a match; since it's possible
1905 * that the thread gets blocked in the monitor,
1906 * we do the lock dance. Interface should
1907 * not be detached since we still have a use
1908 * count held during filter attach.
1909 */
1910 entry->filt_skip = 1; /* skip input/output */
1911 lck_mtx_unlock(&ifp->if_flt_lock);
1912 ifnet_head_done();
1913
1914 lck_mtx_lock(&ifp->if_flt_lock);
1915 if_flt_monitor_enter(ifp);
1916 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1917 LCK_MTX_ASSERT_OWNED);
1918
1919 /* Remove the filter from the list */
1920 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1921 filt_next);
1922
1923 if_flt_monitor_leave(ifp);
1924 lck_mtx_unlock(&ifp->if_flt_lock);
1925 if (dlil_verbose) {
1926 printf("%s: %s filter detached\n",
1927 if_name(ifp), filter->filt_name);
1928 }
1929 goto destroy;
1930 }
1931 lck_mtx_unlock(&ifp->if_flt_lock);
1932 }
1933 ifnet_head_done();
1934
1935 /* filter parameter is not a valid filter ref */
1936 retval = EINVAL;
1937 goto done;
1938 }
1939
1940 if (dlil_verbose) {
1941 printf("%s filter detached\n", filter->filt_name);
1942 }
1943
1944 destroy:
1945
1946 /* Call the detached function if there is one */
1947 if (filter->filt_detached) {
1948 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1949 }
1950
1951 /*
1952 * Decrease filter count and route_generation ID to let TCP
1953 * know it should reevalute doing TSO or not
1954 */
1955 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1956 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
1957 routegenid_update();
1958 }
1959
1960 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1961
1962 /* Free the filter */
1963 zfree(dlif_filt_zone, filter);
1964 filter = NULL;
1965 done:
1966 if (retval != 0 && filter != NULL) {
1967 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1968 filter->filt_name, retval);
1969 }
1970
1971 return retval;
1972 }
1973
1974 __private_extern__ void
1975 dlil_detach_filter(interface_filter_t filter)
1976 {
1977 if (filter == NULL) {
1978 return;
1979 }
1980 dlil_detach_filter_internal(filter, 0);
1981 }
1982
1983 /*
1984 * Main input thread:
1985 *
1986 * a) handles all inbound packets for lo0
1987 * b) handles all inbound packets for interfaces with no dedicated
1988 * input thread (e.g. anything but Ethernet/PDP or those that support
1989 * opportunistic polling.)
1990 * c) protocol registrations
1991 * d) packet injections
1992 */
1993 __attribute__((noreturn))
1994 static void
1995 dlil_main_input_thread_func(void *v, wait_result_t w)
1996 {
1997 #pragma unused(w)
1998 struct dlil_main_threading_info *inpm = v;
1999 struct dlil_threading_info *inp = v;
2000
2001 VERIFY(inp == dlil_main_input_thread);
2002 VERIFY(inp->ifp == NULL);
2003 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2004
2005 while (1) {
2006 struct mbuf *m = NULL, *m_loop = NULL;
2007 u_int32_t m_cnt, m_cnt_loop;
2008 boolean_t proto_req;
2009
2010 lck_mtx_lock_spin(&inp->input_lck);
2011
2012 /* Wait until there is work to be done */
2013 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2014 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2015 (void) msleep(&inp->input_waiting, &inp->input_lck,
2016 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2017 }
2018
2019 inp->input_waiting |= DLIL_INPUT_RUNNING;
2020 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2021
2022 /* Main input thread cannot be terminated */
2023 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2024
2025 proto_req = (inp->input_waiting &
2026 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2027
2028 /* Packets for non-dedicated interfaces other than lo0 */
2029 m_cnt = qlen(&inp->rcvq_pkts);
2030 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2031
2032 /* Packets exclusive to lo0 */
2033 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2034 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
2035
2036 inp->wtot = 0;
2037
2038 lck_mtx_unlock(&inp->input_lck);
2039
2040 /*
2041 * NOTE warning %%% attention !!!!
2042 * We should think about putting some thread starvation
2043 * safeguards if we deal with long chains of packets.
2044 */
2045 if (m_loop != NULL) {
2046 dlil_input_packet_list_extended(lo_ifp, m_loop,
2047 m_cnt_loop, inp->mode);
2048 }
2049
2050 if (m != NULL) {
2051 dlil_input_packet_list_extended(NULL, m,
2052 m_cnt, inp->mode);
2053 }
2054
2055 if (proto_req) {
2056 proto_input_run();
2057 }
2058 }
2059
2060 /* NOTREACHED */
2061 VERIFY(0); /* we should never get here */
2062 }
2063
2064 /*
2065 * Input thread for interfaces with legacy input model.
2066 */
2067 static void
2068 dlil_input_thread_func(void *v, wait_result_t w)
2069 {
2070 #pragma unused(w)
2071 char thread_name[MAXTHREADNAMESIZE];
2072 struct dlil_threading_info *inp = v;
2073 struct ifnet *ifp = inp->ifp;
2074
2075 /* Construct the name for this thread, and then apply it. */
2076 bzero(thread_name, sizeof(thread_name));
2077 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
2078 thread_set_thread_name(inp->input_thr, thread_name);
2079
2080 VERIFY(inp != dlil_main_input_thread);
2081 VERIFY(ifp != NULL);
2082 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
2083 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2084
2085 while (1) {
2086 struct mbuf *m = NULL;
2087 u_int32_t m_cnt;
2088
2089 lck_mtx_lock_spin(&inp->input_lck);
2090
2091 /* Wait until there is work to be done */
2092 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2093 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2094 (void) msleep(&inp->input_waiting, &inp->input_lck,
2095 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2096 }
2097
2098 inp->input_waiting |= DLIL_INPUT_RUNNING;
2099 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2100
2101 /*
2102 * Protocol registration and injection must always use
2103 * the main input thread; in theory the latter can utilize
2104 * the corresponding input thread where the packet arrived
2105 * on, but that requires our knowing the interface in advance
2106 * (and the benefits might not worth the trouble.)
2107 */
2108 VERIFY(!(inp->input_waiting &
2109 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2110
2111 /* Packets for this interface */
2112 m_cnt = qlen(&inp->rcvq_pkts);
2113 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2114
2115 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2116 lck_mtx_unlock(&inp->input_lck);
2117
2118 /* Free up pending packets */
2119 if (m != NULL) {
2120 mbuf_freem_list(m);
2121 }
2122
2123 dlil_terminate_input_thread(inp);
2124 /* NOTREACHED */
2125 return;
2126 }
2127
2128 inp->wtot = 0;
2129
2130 dlil_input_stats_sync(ifp, inp);
2131
2132 lck_mtx_unlock(&inp->input_lck);
2133
2134 /*
2135 * NOTE warning %%% attention !!!!
2136 * We should think about putting some thread starvation
2137 * safeguards if we deal with long chains of packets.
2138 */
2139 if (m != NULL) {
2140 dlil_input_packet_list_extended(NULL, m,
2141 m_cnt, inp->mode);
2142 }
2143 }
2144
2145 /* NOTREACHED */
2146 VERIFY(0); /* we should never get here */
2147 }
2148
2149 /*
2150 * Input thread for interfaces with opportunistic polling input model.
2151 */
2152 static void
2153 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2154 {
2155 #pragma unused(w)
2156 struct dlil_threading_info *inp = v;
2157 struct ifnet *ifp = inp->ifp;
2158 struct timespec ts;
2159
2160 VERIFY(inp != dlil_main_input_thread);
2161 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2162
2163 while (1) {
2164 struct mbuf *m = NULL;
2165 u_int32_t m_cnt, m_size, poll_req = 0;
2166 ifnet_model_t mode;
2167 struct timespec now, delta;
2168 u_int64_t ival;
2169
2170 lck_mtx_lock_spin(&inp->input_lck);
2171
2172 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
2173 ival = IF_RXPOLL_INTERVALTIME_MIN;
2174 }
2175
2176 /* Link parameters changed? */
2177 if (ifp->if_poll_update != 0) {
2178 ifp->if_poll_update = 0;
2179 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2180 }
2181
2182 /* Current operating mode */
2183 mode = inp->mode;
2184
2185 /* Wait until there is work to be done */
2186 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2187 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2188 (void) msleep(&inp->input_waiting, &inp->input_lck,
2189 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2190 }
2191
2192 inp->input_waiting |= DLIL_INPUT_RUNNING;
2193 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2194
2195 /*
2196 * Protocol registration and injection must always use
2197 * the main input thread; in theory the latter can utilize
2198 * the corresponding input thread where the packet arrived
2199 * on, but that requires our knowing the interface in advance
2200 * (and the benefits might not worth the trouble.)
2201 */
2202 VERIFY(!(inp->input_waiting &
2203 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
2204
2205 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2206 /* Free up pending packets */
2207 lck_mtx_convert_spin(&inp->input_lck);
2208 _flushq(&inp->rcvq_pkts);
2209 if (inp->input_mit_tcall != NULL) {
2210 if (thread_call_isactive(inp->input_mit_tcall)) {
2211 thread_call_cancel(inp->input_mit_tcall);
2212 }
2213 }
2214 lck_mtx_unlock(&inp->input_lck);
2215
2216 dlil_terminate_input_thread(inp);
2217 /* NOTREACHED */
2218 return;
2219 }
2220
2221 /* Total count of all packets */
2222 m_cnt = qlen(&inp->rcvq_pkts);
2223
2224 /* Total bytes of all packets */
2225 m_size = qsize(&inp->rcvq_pkts);
2226
2227 /* Packets for this interface */
2228 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2229 VERIFY(m != NULL || m_cnt == 0);
2230
2231 nanouptime(&now);
2232 if (!net_timerisset(&inp->sample_lasttime)) {
2233 *(&inp->sample_lasttime) = *(&now);
2234 }
2235
2236 net_timersub(&now, &inp->sample_lasttime, &delta);
2237 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2238 u_int32_t ptot, btot;
2239
2240 /* Accumulate statistics for current sampling */
2241 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2242
2243 if (net_timercmp(&delta, &inp->sample_holdtime, <)) {
2244 goto skip;
2245 }
2246
2247 *(&inp->sample_lasttime) = *(&now);
2248
2249 /* Calculate min/max of inbound bytes */
2250 btot = (u_int32_t)inp->sstats.bytes;
2251 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot) {
2252 inp->rxpoll_bmin = btot;
2253 }
2254 if (btot > inp->rxpoll_bmax) {
2255 inp->rxpoll_bmax = btot;
2256 }
2257
2258 /* Calculate EWMA of inbound bytes */
2259 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2260
2261 /* Calculate min/max of inbound packets */
2262 ptot = (u_int32_t)inp->sstats.packets;
2263 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot) {
2264 inp->rxpoll_pmin = ptot;
2265 }
2266 if (ptot > inp->rxpoll_pmax) {
2267 inp->rxpoll_pmax = ptot;
2268 }
2269
2270 /* Calculate EWMA of inbound packets */
2271 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2272
2273 /* Reset sampling statistics */
2274 PKTCNTR_CLEAR(&inp->sstats);
2275
2276 /* Calculate EWMA of wakeup requests */
2277 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2278 inp->wtot = 0;
2279
2280 if (dlil_verbose) {
2281 if (!net_timerisset(&inp->dbg_lasttime)) {
2282 *(&inp->dbg_lasttime) = *(&now);
2283 }
2284 net_timersub(&now, &inp->dbg_lasttime, &delta);
2285 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2286 *(&inp->dbg_lasttime) = *(&now);
2287 printf("%s: [%s] pkts avg %d max %d "
2288 "limits [%d/%d], wreq avg %d "
2289 "limits [%d/%d], bytes avg %d "
2290 "limits [%d/%d]\n", if_name(ifp),
2291 (inp->mode ==
2292 IFNET_MODEL_INPUT_POLL_ON) ?
2293 "ON" : "OFF", inp->rxpoll_pavg,
2294 inp->rxpoll_pmax,
2295 inp->rxpoll_plowat,
2296 inp->rxpoll_phiwat,
2297 inp->rxpoll_wavg,
2298 inp->rxpoll_wlowat,
2299 inp->rxpoll_whiwat,
2300 inp->rxpoll_bavg,
2301 inp->rxpoll_blowat,
2302 inp->rxpoll_bhiwat);
2303 }
2304 }
2305
2306 /* Perform mode transition, if necessary */
2307 if (!net_timerisset(&inp->mode_lasttime)) {
2308 *(&inp->mode_lasttime) = *(&now);
2309 }
2310
2311 net_timersub(&now, &inp->mode_lasttime, &delta);
2312 if (net_timercmp(&delta, &inp->mode_holdtime, <)) {
2313 goto skip;
2314 }
2315
2316 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2317 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
2318 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2319 mode = IFNET_MODEL_INPUT_POLL_OFF;
2320 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2321 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2322 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2323 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2324 mode = IFNET_MODEL_INPUT_POLL_ON;
2325 }
2326
2327 if (mode != inp->mode) {
2328 inp->mode = mode;
2329 *(&inp->mode_lasttime) = *(&now);
2330 poll_req++;
2331 }
2332 }
2333 skip:
2334 dlil_input_stats_sync(ifp, inp);
2335
2336 lck_mtx_unlock(&inp->input_lck);
2337
2338 /*
2339 * If there's a mode change and interface is still attached,
2340 * perform a downcall to the driver for the new mode. Also
2341 * hold an IO refcnt on the interface to prevent it from
2342 * being detached (will be release below.)
2343 */
2344 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2345 struct ifnet_model_params p = { mode, { 0 } };
2346 errno_t err;
2347
2348 if (dlil_verbose) {
2349 printf("%s: polling is now %s, "
2350 "pkts avg %d max %d limits [%d/%d], "
2351 "wreq avg %d limits [%d/%d], "
2352 "bytes avg %d limits [%d/%d]\n",
2353 if_name(ifp),
2354 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2355 "ON" : "OFF", inp->rxpoll_pavg,
2356 inp->rxpoll_pmax, inp->rxpoll_plowat,
2357 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2358 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2359 inp->rxpoll_bavg, inp->rxpoll_blowat,
2360 inp->rxpoll_bhiwat);
2361 }
2362
2363 if ((err = ((*ifp->if_input_ctl)(ifp,
2364 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
2365 printf("%s: error setting polling mode "
2366 "to %s (%d)\n", if_name(ifp),
2367 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2368 "ON" : "OFF", err);
2369 }
2370
2371 switch (mode) {
2372 case IFNET_MODEL_INPUT_POLL_OFF:
2373 ifnet_set_poll_cycle(ifp, NULL);
2374 inp->rxpoll_offreq++;
2375 if (err != 0) {
2376 inp->rxpoll_offerr++;
2377 }
2378 break;
2379
2380 case IFNET_MODEL_INPUT_POLL_ON:
2381 net_nsectimer(&ival, &ts);
2382 ifnet_set_poll_cycle(ifp, &ts);
2383 ifnet_poll(ifp);
2384 inp->rxpoll_onreq++;
2385 if (err != 0) {
2386 inp->rxpoll_onerr++;
2387 }
2388 break;
2389
2390 default:
2391 VERIFY(0);
2392 /* NOTREACHED */
2393 }
2394
2395 /* Release the IO refcnt */
2396 ifnet_decr_iorefcnt(ifp);
2397 }
2398
2399 /*
2400 * NOTE warning %%% attention !!!!
2401 * We should think about putting some thread starvation
2402 * safeguards if we deal with long chains of packets.
2403 */
2404 if (m != NULL) {
2405 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2406 }
2407 }
2408
2409 /* NOTREACHED */
2410 VERIFY(0); /* we should never get here */
2411 }
2412
2413 /*
2414 * Must be called on an attached ifnet (caller is expected to check.)
2415 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2416 */
2417 errno_t
2418 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2419 boolean_t locked)
2420 {
2421 struct dlil_threading_info *inp;
2422 u_int64_t sample_holdtime, inbw;
2423
2424 VERIFY(ifp != NULL);
2425 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2426 return ENXIO;
2427 }
2428
2429 if (p != NULL) {
2430 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2431 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
2432 return EINVAL;
2433 }
2434 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2435 p->packets_lowat >= p->packets_hiwat) {
2436 return EINVAL;
2437 }
2438 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2439 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
2440 return EINVAL;
2441 }
2442 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2443 p->bytes_lowat >= p->bytes_hiwat) {
2444 return EINVAL;
2445 }
2446 if (p->interval_time != 0 &&
2447 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
2448 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2449 }
2450 }
2451
2452 if (!locked) {
2453 lck_mtx_lock(&inp->input_lck);
2454 }
2455
2456 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2457
2458 /*
2459 * Normally, we'd reset the parameters to the auto-tuned values
2460 * if the the input thread detects a change in link rate. If the
2461 * driver provides its own parameters right after a link rate
2462 * changes, but before the input thread gets to run, we want to
2463 * make sure to keep the driver's values. Clearing if_poll_update
2464 * will achieve that.
2465 */
2466 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2467 ifp->if_poll_update = 0;
2468 }
2469
2470 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2471 sample_holdtime = 0; /* polling is disabled */
2472 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2473 inp->rxpoll_blowat = 0;
2474 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2475 inp->rxpoll_bhiwat = (u_int32_t)-1;
2476 inp->rxpoll_plim = 0;
2477 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2478 } else {
2479 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2480 u_int64_t ival;
2481 unsigned int n, i;
2482
2483 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2484 if (inbw < rxpoll_tbl[i].speed) {
2485 break;
2486 }
2487 n = i;
2488 }
2489 /* auto-tune if caller didn't specify a value */
2490 plowat = ((p == NULL || p->packets_lowat == 0) ?
2491 rxpoll_tbl[n].plowat : p->packets_lowat);
2492 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2493 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2494 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2495 rxpoll_tbl[n].blowat : p->bytes_lowat);
2496 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2497 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2498 plim = ((p == NULL || p->packets_limit == 0) ?
2499 if_rxpoll_max : p->packets_limit);
2500 ival = ((p == NULL || p->interval_time == 0) ?
2501 if_rxpoll_interval_time : p->interval_time);
2502
2503 VERIFY(plowat != 0 && phiwat != 0);
2504 VERIFY(blowat != 0 && bhiwat != 0);
2505 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2506
2507 sample_holdtime = if_rxpoll_sample_holdtime;
2508 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2509 inp->rxpoll_whiwat = if_rxpoll_whiwat;
2510 inp->rxpoll_plowat = plowat;
2511 inp->rxpoll_phiwat = phiwat;
2512 inp->rxpoll_blowat = blowat;
2513 inp->rxpoll_bhiwat = bhiwat;
2514 inp->rxpoll_plim = plim;
2515 inp->rxpoll_ival = ival;
2516 }
2517
2518 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2519 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2520
2521 if (dlil_verbose) {
2522 printf("%s: speed %llu bps, sample per %llu nsec, "
2523 "poll interval %llu nsec, pkts per poll %u, "
2524 "pkt limits [%u/%u], wreq limits [%u/%u], "
2525 "bytes limits [%u/%u]\n", if_name(ifp),
2526 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2527 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2528 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
2529 }
2530
2531 if (!locked) {
2532 lck_mtx_unlock(&inp->input_lck);
2533 }
2534
2535 return 0;
2536 }
2537
2538 /*
2539 * Must be called on an attached ifnet (caller is expected to check.)
2540 */
2541 errno_t
2542 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2543 {
2544 struct dlil_threading_info *inp;
2545
2546 VERIFY(ifp != NULL && p != NULL);
2547 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2548 return ENXIO;
2549 }
2550
2551 bzero(p, sizeof(*p));
2552
2553 lck_mtx_lock(&inp->input_lck);
2554 p->packets_limit = inp->rxpoll_plim;
2555 p->packets_lowat = inp->rxpoll_plowat;
2556 p->packets_hiwat = inp->rxpoll_phiwat;
2557 p->bytes_lowat = inp->rxpoll_blowat;
2558 p->bytes_hiwat = inp->rxpoll_bhiwat;
2559 p->interval_time = inp->rxpoll_ival;
2560 lck_mtx_unlock(&inp->input_lck);
2561
2562 return 0;
2563 }
2564
2565 errno_t
2566 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2567 const struct ifnet_stat_increment_param *s)
2568 {
2569 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2570 }
2571
2572 errno_t
2573 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2574 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2575 {
2576 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2577 }
2578
2579 static errno_t
2580 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2581 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2582 {
2583 dlil_input_func input_func;
2584 struct ifnet_stat_increment_param _s;
2585 u_int32_t m_cnt = 0, m_size = 0;
2586 struct mbuf *last;
2587 errno_t err = 0;
2588
2589 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2590 if (m_head != NULL) {
2591 mbuf_freem_list(m_head);
2592 }
2593 return EINVAL;
2594 }
2595
2596 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2597 VERIFY(m_tail == NULL || ext);
2598 VERIFY(s != NULL || !ext);
2599
2600 /*
2601 * Drop the packet(s) if the parameters are invalid, or if the
2602 * interface is no longer attached; else hold an IO refcnt to
2603 * prevent it from being detached (will be released below.)
2604 */
2605 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
2606 if (m_head != NULL) {
2607 mbuf_freem_list(m_head);
2608 }
2609 return EINVAL;
2610 }
2611
2612 input_func = ifp->if_input_dlil;
2613 VERIFY(input_func != NULL);
2614
2615 if (m_tail == NULL) {
2616 last = m_head;
2617 while (m_head != NULL) {
2618 #if IFNET_INPUT_SANITY_CHK
2619 if (dlil_input_sanity_check != 0) {
2620 DLIL_INPUT_CHECK(last, ifp);
2621 }
2622 #endif /* IFNET_INPUT_SANITY_CHK */
2623 m_cnt++;
2624 m_size += m_length(last);
2625 if (mbuf_nextpkt(last) == NULL) {
2626 break;
2627 }
2628 last = mbuf_nextpkt(last);
2629 }
2630 m_tail = last;
2631 } else {
2632 #if IFNET_INPUT_SANITY_CHK
2633 if (dlil_input_sanity_check != 0) {
2634 last = m_head;
2635 while (1) {
2636 DLIL_INPUT_CHECK(last, ifp);
2637 m_cnt++;
2638 m_size += m_length(last);
2639 if (mbuf_nextpkt(last) == NULL) {
2640 break;
2641 }
2642 last = mbuf_nextpkt(last);
2643 }
2644 } else {
2645 m_cnt = s->packets_in;
2646 m_size = s->bytes_in;
2647 last = m_tail;
2648 }
2649 #else
2650 m_cnt = s->packets_in;
2651 m_size = s->bytes_in;
2652 last = m_tail;
2653 #endif /* IFNET_INPUT_SANITY_CHK */
2654 }
2655
2656 if (last != m_tail) {
2657 panic_plain("%s: invalid input packet chain for %s, "
2658 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2659 m_tail, last);
2660 }
2661
2662 /*
2663 * Assert packet count only for the extended variant, for backwards
2664 * compatibility, since this came directly from the device driver.
2665 * Relax this assertion for input bytes, as the driver may have
2666 * included the link-layer headers in the computation; hence
2667 * m_size is just an approximation.
2668 */
2669 if (ext && s->packets_in != m_cnt) {
2670 panic_plain("%s: input packet count mismatch for %s, "
2671 "%d instead of %d\n", __func__, if_name(ifp),
2672 s->packets_in, m_cnt);
2673 }
2674
2675 if (s == NULL) {
2676 bzero(&_s, sizeof(_s));
2677 s = &_s;
2678 } else {
2679 _s = *s;
2680 }
2681 _s.packets_in = m_cnt;
2682 _s.bytes_in = m_size;
2683
2684 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2685
2686 if (ifp != lo_ifp) {
2687 /* Release the IO refcnt */
2688 ifnet_decr_iorefcnt(ifp);
2689 }
2690
2691 return err;
2692 }
2693
2694
2695 errno_t
2696 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2697 {
2698 return ifp->if_output(ifp, m);
2699 }
2700
2701 errno_t
2702 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2703 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2704 boolean_t poll, struct thread *tp)
2705 {
2706 struct dlil_threading_info *inp;
2707 u_int32_t m_cnt = s->packets_in;
2708 u_int32_t m_size = s->bytes_in;
2709
2710 if ((inp = ifp->if_inp) == NULL) {
2711 inp = dlil_main_input_thread;
2712 }
2713
2714 /*
2715 * If there is a matching DLIL input thread associated with an
2716 * affinity set, associate this thread with the same set. We
2717 * will only do this once.
2718 */
2719 lck_mtx_lock_spin(&inp->input_lck);
2720 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2721 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2722 (poll && inp->poll_thr == THREAD_NULL))) {
2723 u_int32_t tag = inp->tag;
2724
2725 if (poll) {
2726 VERIFY(inp->poll_thr == THREAD_NULL);
2727 inp->poll_thr = tp;
2728 } else {
2729 VERIFY(inp->wloop_thr == THREAD_NULL);
2730 inp->wloop_thr = tp;
2731 }
2732 lck_mtx_unlock(&inp->input_lck);
2733
2734 /* Associate the current thread with the new affinity tag */
2735 (void) dlil_affinity_set(tp, tag);
2736
2737 /*
2738 * Take a reference on the current thread; during detach,
2739 * we will need to refer to it in order to tear down its
2740 * affinity.
2741 */
2742 thread_reference(tp);
2743 lck_mtx_lock_spin(&inp->input_lck);
2744 }
2745
2746 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2747
2748 /*
2749 * Because of loopbacked multicast we cannot stuff the ifp in
2750 * the rcvif of the packet header: loopback (lo0) packets use a
2751 * dedicated list so that we can later associate them with lo_ifp
2752 * on their way up the stack. Packets for other interfaces without
2753 * dedicated input threads go to the regular list.
2754 */
2755 if (m_head != NULL) {
2756 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2757 struct dlil_main_threading_info *inpm =
2758 (struct dlil_main_threading_info *)inp;
2759 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2760 m_cnt, m_size);
2761 } else {
2762 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2763 m_cnt, m_size);
2764 }
2765 }
2766
2767 #if IFNET_INPUT_SANITY_CHK
2768 if (dlil_input_sanity_check != 0) {
2769 u_int32_t count;
2770 struct mbuf *m0;
2771
2772 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) {
2773 count++;
2774 }
2775
2776 if (count != m_cnt) {
2777 panic_plain("%s: invalid packet count %d "
2778 "(expected %d)\n", if_name(ifp),
2779 count, m_cnt);
2780 /* NOTREACHED */
2781 }
2782
2783 inp->input_mbuf_cnt += m_cnt;
2784 }
2785 #endif /* IFNET_INPUT_SANITY_CHK */
2786
2787 dlil_input_stats_add(s, inp, poll);
2788 /*
2789 * If we're using the main input thread, synchronize the
2790 * stats now since we have the interface context. All
2791 * other cases involving dedicated input threads will
2792 * have their stats synchronized there.
2793 */
2794 if (inp == dlil_main_input_thread) {
2795 dlil_input_stats_sync(ifp, inp);
2796 }
2797
2798 if (inp->input_mit_tcall &&
2799 qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min &&
2800 qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max &&
2801 (ifp->if_family == IFNET_FAMILY_ETHERNET ||
2802 ifp->if_type == IFT_CELLULAR)
2803 ) {
2804 if (!thread_call_isactive(inp->input_mit_tcall)) {
2805 uint64_t deadline;
2806 clock_interval_to_deadline(dlil_rcv_mit_interval,
2807 1, &deadline);
2808 (void) thread_call_enter_delayed(
2809 inp->input_mit_tcall, deadline);
2810 }
2811 } else {
2812 inp->input_waiting |= DLIL_INPUT_WAITING;
2813 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2814 inp->wtot++;
2815 wakeup_one((caddr_t)&inp->input_waiting);
2816 }
2817 }
2818 lck_mtx_unlock(&inp->input_lck);
2819
2820 return 0;
2821 }
2822
2823
2824 static void
2825 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
2826 {
2827 if (!(ifp->if_eflags & IFEF_TXSTART)) {
2828 return;
2829 }
2830 /*
2831 * If the starter thread is inactive, signal it to do work,
2832 * unless the interface is being flow controlled from below,
2833 * e.g. a virtual interface being flow controlled by a real
2834 * network interface beneath it, or it's been disabled via
2835 * a call to ifnet_disable_output().
2836 */
2837 lck_mtx_lock_spin(&ifp->if_start_lock);
2838 if (resetfc) {
2839 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2840 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2841 lck_mtx_unlock(&ifp->if_start_lock);
2842 return;
2843 }
2844 ifp->if_start_req++;
2845 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2846 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2847 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2848 ifp->if_start_delayed == 0)) {
2849 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
2850 ifp->if_start_thread);
2851 }
2852 lck_mtx_unlock(&ifp->if_start_lock);
2853 }
2854
2855 void
2856 ifnet_start(struct ifnet *ifp)
2857 {
2858 ifnet_start_common(ifp, FALSE);
2859 }
2860
2861 static void
2862 ifnet_start_thread_fn(void *v, wait_result_t w)
2863 {
2864 #pragma unused(w)
2865 struct ifnet *ifp = v;
2866 char ifname[IFNAMSIZ + 1];
2867 char thread_name[MAXTHREADNAMESIZE];
2868 struct timespec *ts = NULL;
2869 struct ifclassq *ifq = &ifp->if_snd;
2870 struct timespec delay_start_ts;
2871
2872 /* Construct the name for this thread, and then apply it. */
2873 bzero(thread_name, sizeof(thread_name));
2874 (void) snprintf(thread_name, sizeof(thread_name),
2875 "ifnet_start_%s", ifp->if_xname);
2876 thread_set_thread_name(ifp->if_start_thread, thread_name);
2877
2878 /*
2879 * Treat the dedicated starter thread for lo0 as equivalent to
2880 * the driver workloop thread; if net_affinity is enabled for
2881 * the main input thread, associate this starter thread to it
2882 * by binding them with the same affinity tag. This is done
2883 * only once (as we only have one lo_ifp which never goes away.)
2884 */
2885 if (ifp == lo_ifp) {
2886 struct dlil_threading_info *inp = dlil_main_input_thread;
2887 struct thread *tp = current_thread();
2888
2889 lck_mtx_lock(&inp->input_lck);
2890 if (inp->net_affinity) {
2891 u_int32_t tag = inp->tag;
2892
2893 VERIFY(inp->wloop_thr == THREAD_NULL);
2894 VERIFY(inp->poll_thr == THREAD_NULL);
2895 inp->wloop_thr = tp;
2896 lck_mtx_unlock(&inp->input_lck);
2897
2898 /* Associate this thread with the affinity tag */
2899 (void) dlil_affinity_set(tp, tag);
2900 } else {
2901 lck_mtx_unlock(&inp->input_lck);
2902 }
2903 }
2904
2905 (void) snprintf(ifname, sizeof(ifname), "%s_starter", if_name(ifp));
2906
2907 lck_mtx_lock_spin(&ifp->if_start_lock);
2908
2909 for (;;) {
2910 if (ifp->if_start_thread != NULL) {
2911 (void) msleep(&ifp->if_start_thread,
2912 &ifp->if_start_lock,
2913 (PZERO - 1) | PSPIN, ifname, ts);
2914 }
2915 /* interface is detached? */
2916 if (ifp->if_start_thread == THREAD_NULL) {
2917 ifnet_set_start_cycle(ifp, NULL);
2918 lck_mtx_unlock(&ifp->if_start_lock);
2919 ifnet_purge(ifp);
2920
2921 if (dlil_verbose) {
2922 printf("%s: starter thread terminated\n",
2923 if_name(ifp));
2924 }
2925
2926 /* for the extra refcnt from kernel_thread_start() */
2927 thread_deallocate(current_thread());
2928 /* this is the end */
2929 thread_terminate(current_thread());
2930 /* NOTREACHED */
2931 return;
2932 }
2933
2934 ifp->if_start_active = 1;
2935
2936 for (;;) {
2937 u_int32_t req = ifp->if_start_req;
2938 if (!IFCQ_IS_EMPTY(ifq) &&
2939 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2940 ifp->if_start_delayed == 0 &&
2941 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2942 (ifp->if_eflags & IFEF_DELAY_START)) {
2943 ifp->if_start_delayed = 1;
2944 ifnet_start_delayed++;
2945 break;
2946 } else {
2947 ifp->if_start_delayed = 0;
2948 }
2949 lck_mtx_unlock(&ifp->if_start_lock);
2950
2951 /*
2952 * If no longer attached, don't call start because ifp
2953 * is being destroyed; else hold an IO refcnt to
2954 * prevent the interface from being detached (will be
2955 * released below.)
2956 */
2957 if (!ifnet_is_attached(ifp, 1)) {
2958 lck_mtx_lock_spin(&ifp->if_start_lock);
2959 break;
2960 }
2961
2962 /* invoke the driver's start routine */
2963 ((*ifp->if_start)(ifp));
2964
2965 /*
2966 * Release the io ref count taken by ifnet_is_attached.
2967 */
2968 ifnet_decr_iorefcnt(ifp);
2969
2970 lck_mtx_lock_spin(&ifp->if_start_lock);
2971
2972 /*
2973 * If there's no pending request or if the
2974 * interface has been disabled, we're done.
2975 */
2976 if (req == ifp->if_start_req ||
2977 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
2978 break;
2979 }
2980 }
2981
2982 ifp->if_start_req = 0;
2983 ifp->if_start_active = 0;
2984
2985 /*
2986 * Wakeup N ns from now if rate-controlled by TBR, and if
2987 * there are still packets in the send queue which haven't
2988 * been dequeued so far; else sleep indefinitely (ts = NULL)
2989 * until ifnet_start() is called again.
2990 */
2991 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2992 &ifp->if_start_cycle : NULL);
2993
2994 if (ts == NULL && ifp->if_start_delayed == 1) {
2995 delay_start_ts.tv_sec = 0;
2996 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2997 ts = &delay_start_ts;
2998 }
2999
3000 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
3001 ts = NULL;
3002 }
3003 }
3004
3005 /* NOTREACHED */
3006 }
3007
3008 void
3009 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
3010 {
3011 if (ts == NULL) {
3012 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
3013 } else {
3014 *(&ifp->if_start_cycle) = *ts;
3015 }
3016
3017 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3018 printf("%s: restart interval set to %lu nsec\n",
3019 if_name(ifp), ts->tv_nsec);
3020 }
3021 }
3022
3023 static void
3024 ifnet_poll(struct ifnet *ifp)
3025 {
3026 /*
3027 * If the poller thread is inactive, signal it to do work.
3028 */
3029 lck_mtx_lock_spin(&ifp->if_poll_lock);
3030 ifp->if_poll_req++;
3031 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
3032 wakeup_one((caddr_t)&ifp->if_poll_thread);
3033 }
3034 lck_mtx_unlock(&ifp->if_poll_lock);
3035 }
3036
3037 static void
3038 ifnet_poll_thread_fn(void *v, wait_result_t w)
3039 {
3040 #pragma unused(w)
3041 struct dlil_threading_info *inp;
3042 struct ifnet *ifp = v;
3043 char ifname[IFNAMSIZ + 1];
3044 struct timespec *ts = NULL;
3045 struct ifnet_stat_increment_param s;
3046
3047 snprintf(ifname, sizeof(ifname), "%s_poller",
3048 if_name(ifp));
3049 bzero(&s, sizeof(s));
3050
3051 lck_mtx_lock_spin(&ifp->if_poll_lock);
3052
3053 inp = ifp->if_inp;
3054 VERIFY(inp != NULL);
3055
3056 for (;;) {
3057 if (ifp->if_poll_thread != THREAD_NULL) {
3058 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
3059 (PZERO - 1) | PSPIN, ifname, ts);
3060 }
3061
3062 /* interface is detached (maybe while asleep)? */
3063 if (ifp->if_poll_thread == THREAD_NULL) {
3064 ifnet_set_poll_cycle(ifp, NULL);
3065 lck_mtx_unlock(&ifp->if_poll_lock);
3066
3067 if (dlil_verbose) {
3068 printf("%s: poller thread terminated\n",
3069 if_name(ifp));
3070 }
3071
3072 /* for the extra refcnt from kernel_thread_start() */
3073 thread_deallocate(current_thread());
3074 /* this is the end */
3075 thread_terminate(current_thread());
3076 /* NOTREACHED */
3077 return;
3078 }
3079
3080 ifp->if_poll_active = 1;
3081 for (;;) {
3082 struct mbuf *m_head, *m_tail;
3083 u_int32_t m_lim, m_cnt, m_totlen;
3084 u_int16_t req = ifp->if_poll_req;
3085
3086 lck_mtx_unlock(&ifp->if_poll_lock);
3087
3088 /*
3089 * If no longer attached, there's nothing to do;
3090 * else hold an IO refcnt to prevent the interface
3091 * from being detached (will be released below.)
3092 */
3093 if (!ifnet_is_attached(ifp, 1)) {
3094 lck_mtx_lock_spin(&ifp->if_poll_lock);
3095 break;
3096 }
3097
3098 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
3099 MAX((qlimit(&inp->rcvq_pkts)),
3100 (inp->rxpoll_phiwat << 2));
3101
3102 if (dlil_verbose > 1) {
3103 printf("%s: polling up to %d pkts, "
3104 "pkts avg %d max %d, wreq avg %d, "
3105 "bytes avg %d\n",
3106 if_name(ifp), m_lim,
3107 inp->rxpoll_pavg, inp->rxpoll_pmax,
3108 inp->rxpoll_wavg, inp->rxpoll_bavg);
3109 }
3110
3111 /* invoke the driver's input poll routine */
3112 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3113 &m_cnt, &m_totlen));
3114
3115 if (m_head != NULL) {
3116 VERIFY(m_tail != NULL && m_cnt > 0);
3117
3118 if (dlil_verbose > 1) {
3119 printf("%s: polled %d pkts, "
3120 "pkts avg %d max %d, wreq avg %d, "
3121 "bytes avg %d\n",
3122 if_name(ifp), m_cnt,
3123 inp->rxpoll_pavg, inp->rxpoll_pmax,
3124 inp->rxpoll_wavg, inp->rxpoll_bavg);
3125 }
3126
3127 /* stats are required for extended variant */
3128 s.packets_in = m_cnt;
3129 s.bytes_in = m_totlen;
3130
3131 (void) ifnet_input_common(ifp, m_head, m_tail,
3132 &s, TRUE, TRUE);
3133 } else {
3134 if (dlil_verbose > 1) {
3135 printf("%s: no packets, "
3136 "pkts avg %d max %d, wreq avg %d, "
3137 "bytes avg %d\n",
3138 if_name(ifp), inp->rxpoll_pavg,
3139 inp->rxpoll_pmax, inp->rxpoll_wavg,
3140 inp->rxpoll_bavg);
3141 }
3142
3143 (void) ifnet_input_common(ifp, NULL, NULL,
3144 NULL, FALSE, TRUE);
3145 }
3146
3147 /* Release the io ref count */
3148 ifnet_decr_iorefcnt(ifp);
3149
3150 lck_mtx_lock_spin(&ifp->if_poll_lock);
3151
3152 /* if there's no pending request, we're done */
3153 if (req == ifp->if_poll_req) {
3154 break;
3155 }
3156 }
3157 ifp->if_poll_req = 0;
3158 ifp->if_poll_active = 0;
3159
3160 /*
3161 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3162 * until ifnet_poll() is called again.
3163 */
3164 ts = &ifp->if_poll_cycle;
3165 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
3166 ts = NULL;
3167 }
3168 }
3169
3170 /* NOTREACHED */
3171 }
3172
3173 void
3174 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3175 {
3176 if (ts == NULL) {
3177 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
3178 } else {
3179 *(&ifp->if_poll_cycle) = *ts;
3180 }
3181
3182 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
3183 printf("%s: poll interval set to %lu nsec\n",
3184 if_name(ifp), ts->tv_nsec);
3185 }
3186 }
3187
3188 void
3189 ifnet_purge(struct ifnet *ifp)
3190 {
3191 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
3192 if_qflush(ifp, 0);
3193 }
3194 }
3195
3196 void
3197 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3198 {
3199 IFCQ_LOCK_ASSERT_HELD(ifq);
3200
3201 if (!(IFCQ_IS_READY(ifq))) {
3202 return;
3203 }
3204
3205 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3206 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
3207 ifq->ifcq_tbr.tbr_percent, 0 };
3208 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3209 }
3210
3211 ifclassq_update(ifq, ev);
3212 }
3213
3214 void
3215 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3216 {
3217 switch (ev) {
3218 case CLASSQ_EV_LINK_BANDWIDTH:
3219 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
3220 ifp->if_poll_update++;
3221 }
3222 break;
3223
3224 default:
3225 break;
3226 }
3227 }
3228
3229 errno_t
3230 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3231 {
3232 struct ifclassq *ifq;
3233 u_int32_t omodel;
3234 errno_t err;
3235
3236 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
3237 return EINVAL;
3238 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3239 return ENXIO;
3240 }
3241
3242 ifq = &ifp->if_snd;
3243 IFCQ_LOCK(ifq);
3244 omodel = ifp->if_output_sched_model;
3245 ifp->if_output_sched_model = model;
3246 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
3247 ifp->if_output_sched_model = omodel;
3248 }
3249 IFCQ_UNLOCK(ifq);
3250
3251 return err;
3252 }
3253
3254 errno_t
3255 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3256 {
3257 if (ifp == NULL) {
3258 return EINVAL;
3259 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3260 return ENXIO;
3261 }
3262
3263 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3264
3265 return 0;
3266 }
3267
3268 errno_t
3269 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3270 {
3271 if (ifp == NULL || maxqlen == NULL) {
3272 return EINVAL;
3273 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3274 return ENXIO;
3275 }
3276
3277 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3278
3279 return 0;
3280 }
3281
3282 errno_t
3283 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3284 {
3285 errno_t err;
3286
3287 if (ifp == NULL || pkts == NULL) {
3288 err = EINVAL;
3289 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3290 err = ENXIO;
3291 } else {
3292 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3293 pkts, NULL);
3294 }
3295
3296 return err;
3297 }
3298
3299 errno_t
3300 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3301 u_int32_t *pkts, u_int32_t *bytes)
3302 {
3303 errno_t err;
3304
3305 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3306 (pkts == NULL && bytes == NULL)) {
3307 err = EINVAL;
3308 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
3309 err = ENXIO;
3310 } else {
3311 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3312 }
3313
3314 return err;
3315 }
3316
3317 errno_t
3318 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3319 {
3320 struct dlil_threading_info *inp;
3321
3322 if (ifp == NULL) {
3323 return EINVAL;
3324 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3325 return ENXIO;
3326 }
3327
3328 if (maxqlen == 0) {
3329 maxqlen = if_rcvq_maxlen;
3330 } else if (maxqlen < IF_RCVQ_MINLEN) {
3331 maxqlen = IF_RCVQ_MINLEN;
3332 }
3333
3334 inp = ifp->if_inp;
3335 lck_mtx_lock(&inp->input_lck);
3336 qlimit(&inp->rcvq_pkts) = maxqlen;
3337 lck_mtx_unlock(&inp->input_lck);
3338
3339 return 0;
3340 }
3341
3342 errno_t
3343 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3344 {
3345 struct dlil_threading_info *inp;
3346
3347 if (ifp == NULL || maxqlen == NULL) {
3348 return EINVAL;
3349 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
3350 return ENXIO;
3351 }
3352
3353 inp = ifp->if_inp;
3354 lck_mtx_lock(&inp->input_lck);
3355 *maxqlen = qlimit(&inp->rcvq_pkts);
3356 lck_mtx_unlock(&inp->input_lck);
3357 return 0;
3358 }
3359
3360 void
3361 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3362 uint16_t delay_timeout)
3363 {
3364 if (delay_qlen > 0 && delay_timeout > 0) {
3365 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3366 ifp->if_start_delay_qlen = min(100, delay_qlen);
3367 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3368 /* convert timeout to nanoseconds */
3369 ifp->if_start_delay_timeout *= 1000;
3370 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3371 ifp->if_xname, (uint32_t)delay_qlen,
3372 (uint32_t)delay_timeout);
3373 } else {
3374 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3375 }
3376 }
3377
3378 static inline errno_t
3379 ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype,
3380 boolean_t flush, boolean_t *pdrop)
3381 {
3382 volatile uint64_t *fg_ts = NULL;
3383 volatile uint64_t *rt_ts = NULL;
3384 struct mbuf *m = p;
3385 struct timespec now;
3386 u_int64_t now_nsec = 0;
3387 int error = 0;
3388
3389 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3390
3391 /*
3392 * If packet already carries a timestamp, either from dlil_output()
3393 * or from flowswitch, use it here. Otherwise, record timestamp.
3394 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3395 * the timestamp value is used internally there.
3396 */
3397 switch (ptype) {
3398 case QP_MBUF:
3399 ASSERT(m->m_flags & M_PKTHDR);
3400 ASSERT(m->m_nextpkt == NULL);
3401
3402 if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3403 m->m_pkthdr.pkt_timestamp == 0) {
3404 nanouptime(&now);
3405 net_timernsec(&now, &now_nsec);
3406 m->m_pkthdr.pkt_timestamp = now_nsec;
3407 }
3408 m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3409 /*
3410 * If the packet service class is not background,
3411 * update the timestamp to indicate recent activity
3412 * on a foreground socket.
3413 */
3414 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3415 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3416 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) {
3417 ifp->if_fg_sendts = _net_uptime;
3418 if (fg_ts != NULL) {
3419 *fg_ts = _net_uptime;
3420 }
3421 }
3422 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3423 ifp->if_rt_sendts = _net_uptime;
3424 if (rt_ts != NULL) {
3425 *rt_ts = _net_uptime;
3426 }
3427 }
3428 }
3429 break;
3430
3431
3432 default:
3433 VERIFY(0);
3434 /* NOTREACHED */
3435 }
3436
3437 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3438 if (now_nsec == 0) {
3439 nanouptime(&now);
3440 net_timernsec(&now, &now_nsec);
3441 }
3442 /*
3443 * If the driver chose to delay start callback for
3444 * coalescing multiple packets, Then use the following
3445 * heuristics to make sure that start callback will
3446 * be delayed only when bulk data transfer is detected.
3447 * 1. number of packets enqueued in (delay_win * 2) is
3448 * greater than or equal to the delay qlen.
3449 * 2. If delay_start is enabled it will stay enabled for
3450 * another 10 idle windows. This is to take into account
3451 * variable RTT and burst traffic.
3452 * 3. If the time elapsed since last enqueue is more
3453 * than 200ms we disable delaying start callback. This is
3454 * is to take idle time into account.
3455 */
3456 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3457 if (ifp->if_start_delay_swin > 0) {
3458 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3459 ifp->if_start_delay_cnt++;
3460 } else if ((now_nsec - ifp->if_start_delay_swin)
3461 >= (200 * 1000 * 1000)) {
3462 ifp->if_start_delay_swin = now_nsec;
3463 ifp->if_start_delay_cnt = 1;
3464 ifp->if_start_delay_idle = 0;
3465 if (ifp->if_eflags & IFEF_DELAY_START) {
3466 ifp->if_eflags &=
3467 ~(IFEF_DELAY_START);
3468 ifnet_delay_start_disabled++;
3469 }
3470 } else {
3471 if (ifp->if_start_delay_cnt >=
3472 ifp->if_start_delay_qlen) {
3473 ifp->if_eflags |= IFEF_DELAY_START;
3474 ifp->if_start_delay_idle = 0;
3475 } else {
3476 if (ifp->if_start_delay_idle >= 10) {
3477 ifp->if_eflags &= ~(IFEF_DELAY_START);
3478 ifnet_delay_start_disabled++;
3479 } else {
3480 ifp->if_start_delay_idle++;
3481 }
3482 }
3483 ifp->if_start_delay_swin = now_nsec;
3484 ifp->if_start_delay_cnt = 1;
3485 }
3486 } else {
3487 ifp->if_start_delay_swin = now_nsec;
3488 ifp->if_start_delay_cnt = 1;
3489 ifp->if_start_delay_idle = 0;
3490 ifp->if_eflags &= ~(IFEF_DELAY_START);
3491 }
3492 } else {
3493 ifp->if_eflags &= ~(IFEF_DELAY_START);
3494 }
3495
3496 switch (ptype) {
3497 case QP_MBUF:
3498 /* enqueue the packet (caller consumes object) */
3499 error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop);
3500 m = NULL;
3501 break;
3502
3503
3504 default:
3505 break;
3506 }
3507
3508 /*
3509 * Tell the driver to start dequeueing; do this even when the queue
3510 * for the packet is suspended (EQSUSPENDED), as the driver could still
3511 * be dequeueing from other unsuspended queues.
3512 */
3513 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3514 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3515 ifnet_start(ifp);
3516 }
3517
3518 return error;
3519 }
3520
3521 errno_t
3522 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3523 {
3524 boolean_t pdrop;
3525 return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
3526 }
3527
3528 errno_t
3529 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3530 boolean_t *pdrop)
3531 {
3532 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3533 m->m_nextpkt != NULL) {
3534 if (m != NULL) {
3535 m_freem_list(m);
3536 *pdrop = TRUE;
3537 }
3538 return EINVAL;
3539 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3540 !IF_FULLY_ATTACHED(ifp)) {
3541 /* flag tested without lock for performance */
3542 m_freem(m);
3543 *pdrop = TRUE;
3544 return ENXIO;
3545 } else if (!(ifp->if_flags & IFF_UP)) {
3546 m_freem(m);
3547 *pdrop = TRUE;
3548 return ENETDOWN;
3549 }
3550
3551 return ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop);
3552 }
3553
3554
3555 errno_t
3556 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3557 {
3558 errno_t rc;
3559 classq_pkt_type_t ptype;
3560 if (ifp == NULL || mp == NULL) {
3561 return EINVAL;
3562 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3563 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3564 return ENXIO;
3565 }
3566 if (!ifnet_is_attached(ifp, 1)) {
3567 return ENXIO;
3568 }
3569
3570 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3571 (void **)mp, NULL, NULL, NULL, &ptype);
3572 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3573 ifnet_decr_iorefcnt(ifp);
3574
3575 return rc;
3576 }
3577
3578 errno_t
3579 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3580 struct mbuf **mp)
3581 {
3582 errno_t rc;
3583 classq_pkt_type_t ptype;
3584 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
3585 return EINVAL;
3586 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3587 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3588 return ENXIO;
3589 }
3590 if (!ifnet_is_attached(ifp, 1)) {
3591 return ENXIO;
3592 }
3593
3594 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
3595 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL,
3596 NULL, &ptype);
3597 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3598 ifnet_decr_iorefcnt(ifp);
3599 return rc;
3600 }
3601
3602 errno_t
3603 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3604 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3605 {
3606 errno_t rc;
3607 classq_pkt_type_t ptype;
3608 if (ifp == NULL || head == NULL || pkt_limit < 1) {
3609 return EINVAL;
3610 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3611 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3612 return ENXIO;
3613 }
3614 if (!ifnet_is_attached(ifp, 1)) {
3615 return ENXIO;
3616 }
3617
3618 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
3619 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt,
3620 len, &ptype);
3621 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3622 ifnet_decr_iorefcnt(ifp);
3623 return rc;
3624 }
3625
3626 errno_t
3627 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3628 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3629 {
3630 errno_t rc;
3631 classq_pkt_type_t ptype;
3632 if (ifp == NULL || head == NULL || byte_limit < 1) {
3633 return EINVAL;
3634 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3635 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3636 return ENXIO;
3637 }
3638 if (!ifnet_is_attached(ifp, 1)) {
3639 return ENXIO;
3640 }
3641
3642 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3643 byte_limit, (void **)head, (void **)tail, cnt, len, &ptype);
3644 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3645 ifnet_decr_iorefcnt(ifp);
3646 return rc;
3647 }
3648
3649 errno_t
3650 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3651 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3652 u_int32_t *len)
3653 {
3654 errno_t rc;
3655 classq_pkt_type_t ptype;
3656 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3657 !MBUF_VALID_SC(sc)) {
3658 return EINVAL;
3659 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3660 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3661 return ENXIO;
3662 }
3663 if (!ifnet_is_attached(ifp, 1)) {
3664 return ENXIO;
3665 }
3666
3667 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
3668 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head,
3669 (void **)tail, cnt, len, &ptype);
3670 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3671 ifnet_decr_iorefcnt(ifp);
3672 return rc;
3673 }
3674
3675 #if !CONFIG_EMBEDDED
3676 errno_t
3677 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3678 const struct sockaddr *dest, const char *dest_linkaddr,
3679 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3680 {
3681 if (pre != NULL) {
3682 *pre = 0;
3683 }
3684 if (post != NULL) {
3685 *post = 0;
3686 }
3687
3688 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
3689 }
3690 #endif /* !CONFIG_EMBEDDED */
3691
3692 static int
3693 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3694 char **frame_header_p, protocol_family_t protocol_family)
3695 {
3696 struct ifnet_filter *filter;
3697
3698 /*
3699 * Pass the inbound packet to the interface filters
3700 */
3701 lck_mtx_lock_spin(&ifp->if_flt_lock);
3702 /* prevent filter list from changing in case we drop the lock */
3703 if_flt_monitor_busy(ifp);
3704 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3705 int result;
3706
3707 if (!filter->filt_skip && filter->filt_input != NULL &&
3708 (filter->filt_protocol == 0 ||
3709 filter->filt_protocol == protocol_family)) {
3710 lck_mtx_unlock(&ifp->if_flt_lock);
3711
3712 result = (*filter->filt_input)(filter->filt_cookie,
3713 ifp, protocol_family, m_p, frame_header_p);
3714
3715 lck_mtx_lock_spin(&ifp->if_flt_lock);
3716 if (result != 0) {
3717 /* we're done with the filter list */
3718 if_flt_monitor_unbusy(ifp);
3719 lck_mtx_unlock(&ifp->if_flt_lock);
3720 return result;
3721 }
3722 }
3723 }
3724 /* we're done with the filter list */
3725 if_flt_monitor_unbusy(ifp);
3726 lck_mtx_unlock(&ifp->if_flt_lock);
3727
3728 /*
3729 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3730 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3731 */
3732 if (*m_p != NULL) {
3733 (*m_p)->m_flags &= ~M_PROTO1;
3734 }
3735
3736 return 0;
3737 }
3738
3739 static int
3740 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3741 protocol_family_t protocol_family)
3742 {
3743 struct ifnet_filter *filter;
3744
3745 /*
3746 * Pass the outbound packet to the interface filters
3747 */
3748 lck_mtx_lock_spin(&ifp->if_flt_lock);
3749 /* prevent filter list from changing in case we drop the lock */
3750 if_flt_monitor_busy(ifp);
3751 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3752 int result;
3753
3754 if (!filter->filt_skip && filter->filt_output != NULL &&
3755 (filter->filt_protocol == 0 ||
3756 filter->filt_protocol == protocol_family)) {
3757 lck_mtx_unlock(&ifp->if_flt_lock);
3758
3759 result = filter->filt_output(filter->filt_cookie, ifp,
3760 protocol_family, m_p);
3761
3762 lck_mtx_lock_spin(&ifp->if_flt_lock);
3763 if (result != 0) {
3764 /* we're done with the filter list */
3765 if_flt_monitor_unbusy(ifp);
3766 lck_mtx_unlock(&ifp->if_flt_lock);
3767 return result;
3768 }
3769 }
3770 }
3771 /* we're done with the filter list */
3772 if_flt_monitor_unbusy(ifp);
3773 lck_mtx_unlock(&ifp->if_flt_lock);
3774
3775 return 0;
3776 }
3777
3778 static void
3779 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
3780 {
3781 int error;
3782
3783 if (ifproto->proto_kpi == kProtoKPI_v1) {
3784 /* Version 1 protocols get one packet at a time */
3785 while (m != NULL) {
3786 char * frame_header;
3787 mbuf_t next_packet;
3788
3789 next_packet = m->m_nextpkt;
3790 m->m_nextpkt = NULL;
3791 frame_header = m->m_pkthdr.pkt_hdr;
3792 m->m_pkthdr.pkt_hdr = NULL;
3793 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3794 ifproto->protocol_family, m, frame_header);
3795 if (error != 0 && error != EJUSTRETURN) {
3796 m_freem(m);
3797 }
3798 m = next_packet;
3799 }
3800 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
3801 /* Version 2 protocols support packet lists */
3802 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
3803 ifproto->protocol_family, m);
3804 if (error != 0 && error != EJUSTRETURN) {
3805 m_freem_list(m);
3806 }
3807 }
3808 }
3809
3810 static void
3811 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3812 struct dlil_threading_info *inp, boolean_t poll)
3813 {
3814 struct ifnet_stat_increment_param *d = &inp->stats;
3815
3816 if (s->packets_in != 0) {
3817 d->packets_in += s->packets_in;
3818 }
3819 if (s->bytes_in != 0) {
3820 d->bytes_in += s->bytes_in;
3821 }
3822 if (s->errors_in != 0) {
3823 d->errors_in += s->errors_in;
3824 }
3825
3826 if (s->packets_out != 0) {
3827 d->packets_out += s->packets_out;
3828 }
3829 if (s->bytes_out != 0) {
3830 d->bytes_out += s->bytes_out;
3831 }
3832 if (s->errors_out != 0) {
3833 d->errors_out += s->errors_out;
3834 }
3835
3836 if (s->collisions != 0) {
3837 d->collisions += s->collisions;
3838 }
3839 if (s->dropped != 0) {
3840 d->dropped += s->dropped;
3841 }
3842
3843 if (poll) {
3844 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3845 }
3846 }
3847
3848 static void
3849 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3850 {
3851 struct ifnet_stat_increment_param *s = &inp->stats;
3852
3853 /*
3854 * Use of atomic operations is unavoidable here because
3855 * these stats may also be incremented elsewhere via KPIs.
3856 */
3857 if (s->packets_in != 0) {
3858 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3859 s->packets_in = 0;
3860 }
3861 if (s->bytes_in != 0) {
3862 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3863 s->bytes_in = 0;
3864 }
3865 if (s->errors_in != 0) {
3866 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3867 s->errors_in = 0;
3868 }
3869
3870 if (s->packets_out != 0) {
3871 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3872 s->packets_out = 0;
3873 }
3874 if (s->bytes_out != 0) {
3875 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3876 s->bytes_out = 0;
3877 }
3878 if (s->errors_out != 0) {
3879 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3880 s->errors_out = 0;
3881 }
3882
3883 if (s->collisions != 0) {
3884 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3885 s->collisions = 0;
3886 }
3887 if (s->dropped != 0) {
3888 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3889 s->dropped = 0;
3890 }
3891
3892 if (ifp->if_data_threshold != 0) {
3893 lck_mtx_convert_spin(&inp->input_lck);
3894 ifnet_notify_data_threshold(ifp);
3895 }
3896
3897 /*
3898 * No need for atomic operations as they are modified here
3899 * only from within the DLIL input thread context.
3900 */
3901 if (inp->tstats.packets != 0) {
3902 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3903 inp->tstats.packets = 0;
3904 }
3905 if (inp->tstats.bytes != 0) {
3906 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3907 inp->tstats.bytes = 0;
3908 }
3909 }
3910
3911 __private_extern__ void
3912 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3913 {
3914 return dlil_input_packet_list_common(ifp, m, 0,
3915 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
3916 }
3917
3918 __private_extern__ void
3919 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3920 u_int32_t cnt, ifnet_model_t mode)
3921 {
3922 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
3923 }
3924
3925 static void
3926 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3927 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
3928 {
3929 int error = 0;
3930 protocol_family_t protocol_family;
3931 mbuf_t next_packet;
3932 ifnet_t ifp = ifp_param;
3933 char *frame_header = NULL;
3934 struct if_proto *last_ifproto = NULL;
3935 mbuf_t pkt_first = NULL;
3936 mbuf_t *pkt_next = NULL;
3937 u_int32_t poll_thresh = 0, poll_ival = 0;
3938
3939 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
3940
3941 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3942 (poll_ival = if_rxpoll_interval_pkts) > 0) {
3943 poll_thresh = cnt;
3944 }
3945
3946 while (m != NULL) {
3947 struct if_proto *ifproto = NULL;
3948 int iorefcnt = 0;
3949 uint32_t pktf_mask; /* pkt flags to preserve */
3950
3951 if (ifp_param == NULL) {
3952 ifp = m->m_pkthdr.rcvif;
3953 }
3954
3955 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3956 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
3957 ifnet_poll(ifp);
3958 }
3959
3960 /* Check if this mbuf looks valid */
3961 MBUF_INPUT_CHECK(m, ifp);
3962
3963 next_packet = m->m_nextpkt;
3964 m->m_nextpkt = NULL;
3965 frame_header = m->m_pkthdr.pkt_hdr;
3966 m->m_pkthdr.pkt_hdr = NULL;
3967
3968 /*
3969 * Get an IO reference count if the interface is not
3970 * loopback (lo0) and it is attached; lo0 never goes
3971 * away, so optimize for that.
3972 */
3973 if (ifp != lo_ifp) {
3974 if (!ifnet_is_attached(ifp, 1)) {
3975 m_freem(m);
3976 goto next;
3977 }
3978 iorefcnt = 1;
3979 /*
3980 * Preserve the time stamp if it was set.
3981 */
3982 pktf_mask = PKTF_TS_VALID;
3983 } else {
3984 /*
3985 * If this arrived on lo0, preserve interface addr
3986 * info to allow for connectivity between loopback
3987 * and local interface addresses.
3988 */
3989 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
3990 }
3991
3992 /* make sure packet comes in clean */
3993 m_classifier_init(m, pktf_mask);
3994
3995 ifp_inc_traffic_class_in(ifp, m);
3996
3997 /* find which protocol family this packet is for */
3998 ifnet_lock_shared(ifp);
3999 error = (*ifp->if_demux)(ifp, m, frame_header,
4000 &protocol_family);
4001 ifnet_lock_done(ifp);
4002 if (error != 0) {
4003 if (error == EJUSTRETURN) {
4004 goto next;
4005 }
4006 protocol_family = 0;
4007 }
4008
4009 pktap_input(ifp, protocol_family, m, frame_header);
4010
4011 /* Drop v4 packets received on CLAT46 enabled interface */
4012 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
4013 m_freem(m);
4014 ip6stat.ip6s_clat464_in_v4_drop++;
4015 goto next;
4016 }
4017
4018 /* Translate the packet if it is received on CLAT interface */
4019 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
4020 && dlil_is_clat_needed(protocol_family, m)) {
4021 char *data = NULL;
4022 struct ether_header eh;
4023 struct ether_header *ehp = NULL;
4024
4025 if (ifp->if_type == IFT_ETHER) {
4026 ehp = (struct ether_header *)(void *)frame_header;
4027 /* Skip RX Ethernet packets if they are not IPV6 */
4028 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
4029 goto skip_clat;
4030 }
4031
4032 /* Keep a copy of frame_header for Ethernet packets */
4033 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
4034 }
4035 error = dlil_clat64(ifp, &protocol_family, &m);
4036 data = (char *) mbuf_data(m);
4037 if (error != 0) {
4038 m_freem(m);
4039 ip6stat.ip6s_clat464_in_drop++;
4040 goto next;
4041 }
4042 /* Native v6 should be No-op */
4043 if (protocol_family != PF_INET) {
4044 goto skip_clat;
4045 }
4046
4047 /* Do this only for translated v4 packets. */
4048 switch (ifp->if_type) {
4049 case IFT_CELLULAR:
4050 frame_header = data;
4051 break;
4052 case IFT_ETHER:
4053 /*
4054 * Drop if the mbuf doesn't have enough
4055 * space for Ethernet header
4056 */
4057 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
4058 m_free(m);
4059 ip6stat.ip6s_clat464_in_drop++;
4060 goto next;
4061 }
4062 /*
4063 * Set the frame_header ETHER_HDR_LEN bytes
4064 * preceeding the data pointer. Change
4065 * the ether_type too.
4066 */
4067 frame_header = data - ETHER_HDR_LEN;
4068 eh.ether_type = htons(ETHERTYPE_IP);
4069 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
4070 break;
4071 }
4072 }
4073 skip_clat:
4074 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
4075 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
4076 dlil_input_cksum_dbg(ifp, m, frame_header,
4077 protocol_family);
4078 }
4079
4080 /*
4081 * For partial checksum offload, we expect the driver to
4082 * set the start offset indicating the start of the span
4083 * that is covered by the hardware-computed checksum;
4084 * adjust this start offset accordingly because the data
4085 * pointer has been advanced beyond the link-layer header.
4086 *
4087 * Don't adjust if the interface is a bridge member, as
4088 * the adjustment will occur from the context of the
4089 * bridge interface during input.
4090 */
4091 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
4092 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4093 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4094 int adj;
4095 if (frame_header == NULL ||
4096 frame_header < (char *)mbuf_datastart(m) ||
4097 frame_header > (char *)m->m_data ||
4098 (adj = (m->m_data - frame_header)) >
4099 m->m_pkthdr.csum_rx_start) {
4100 m->m_pkthdr.csum_data = 0;
4101 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
4102 hwcksum_in_invalidated++;
4103 } else {
4104 m->m_pkthdr.csum_rx_start -= adj;
4105 }
4106 }
4107
4108 if (clat_debug) {
4109 pktap_input(ifp, protocol_family, m, frame_header);
4110 }
4111
4112 if (m->m_flags & (M_BCAST | M_MCAST)) {
4113 atomic_add_64(&ifp->if_imcasts, 1);
4114 }
4115
4116 /* run interface filters, exclude VLAN packets PR-3586856 */
4117 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4118 error = dlil_interface_filters_input(ifp, &m,
4119 &frame_header, protocol_family);
4120 if (error != 0) {
4121 if (error != EJUSTRETURN) {
4122 m_freem(m);
4123 }
4124 goto next;
4125 }
4126 }
4127 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
4128 m_freem(m);
4129 goto next;
4130 }
4131
4132 /* Lookup the protocol attachment to this interface */
4133 if (protocol_family == 0) {
4134 ifproto = NULL;
4135 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4136 (last_ifproto->protocol_family == protocol_family)) {
4137 VERIFY(ifproto == NULL);
4138 ifproto = last_ifproto;
4139 if_proto_ref(last_ifproto);
4140 } else {
4141 VERIFY(ifproto == NULL);
4142 ifnet_lock_shared(ifp);
4143 /* callee holds a proto refcnt upon success */
4144 ifproto = find_attached_proto(ifp, protocol_family);
4145 ifnet_lock_done(ifp);
4146 }
4147 if (ifproto == NULL) {
4148 /* no protocol for this packet, discard */
4149 m_freem(m);
4150 goto next;
4151 }
4152 if (ifproto != last_ifproto) {
4153 if (last_ifproto != NULL) {
4154 /* pass up the list for the previous protocol */
4155 dlil_ifproto_input(last_ifproto, pkt_first);
4156 pkt_first = NULL;
4157 if_proto_free(last_ifproto);
4158 }
4159 last_ifproto = ifproto;
4160 if_proto_ref(ifproto);
4161 }
4162 /* extend the list */
4163 m->m_pkthdr.pkt_hdr = frame_header;
4164 if (pkt_first == NULL) {
4165 pkt_first = m;
4166 } else {
4167 *pkt_next = m;
4168 }
4169 pkt_next = &m->m_nextpkt;
4170
4171 next:
4172 if (next_packet == NULL && last_ifproto != NULL) {
4173 /* pass up the last list of packets */
4174 dlil_ifproto_input(last_ifproto, pkt_first);
4175 if_proto_free(last_ifproto);
4176 last_ifproto = NULL;
4177 }
4178 if (ifproto != NULL) {
4179 if_proto_free(ifproto);
4180 ifproto = NULL;
4181 }
4182
4183 m = next_packet;
4184
4185 /* update the driver's multicast filter, if needed */
4186 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
4187 ifp->if_updatemcasts = 0;
4188 }
4189 if (iorefcnt == 1) {
4190 ifnet_decr_iorefcnt(ifp);
4191 }
4192 }
4193
4194 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4195 }
4196
4197 errno_t
4198 if_mcasts_update(struct ifnet *ifp)
4199 {
4200 errno_t err;
4201
4202 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4203 if (err == EAFNOSUPPORT) {
4204 err = 0;
4205 }
4206 printf("%s: %s %d suspended link-layer multicast membership(s) "
4207 "(err=%d)\n", if_name(ifp),
4208 (err == 0 ? "successfully restored" : "failed to restore"),
4209 ifp->if_updatemcasts, err);
4210
4211 /* just return success */
4212 return 0;
4213 }
4214
4215 /* If ifp is set, we will increment the generation for the interface */
4216 int
4217 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4218 {
4219 if (ifp != NULL) {
4220 ifnet_increment_generation(ifp);
4221 }
4222
4223 #if NECP
4224 necp_update_all_clients();
4225 #endif /* NECP */
4226
4227 return kev_post_msg(event);
4228 }
4229
4230 __private_extern__ void
4231 dlil_post_sifflags_msg(struct ifnet * ifp)
4232 {
4233 struct kev_msg ev_msg;
4234 struct net_event_data ev_data;
4235
4236 bzero(&ev_data, sizeof(ev_data));
4237 bzero(&ev_msg, sizeof(ev_msg));
4238 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4239 ev_msg.kev_class = KEV_NETWORK_CLASS;
4240 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4241 ev_msg.event_code = KEV_DL_SIFFLAGS;
4242 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4243 ev_data.if_family = ifp->if_family;
4244 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4245 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4246 ev_msg.dv[0].data_ptr = &ev_data;
4247 ev_msg.dv[1].data_length = 0;
4248 dlil_post_complete_msg(ifp, &ev_msg);
4249 }
4250
4251 #define TMP_IF_PROTO_ARR_SIZE 10
4252 static int
4253 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4254 {
4255 struct ifnet_filter *filter = NULL;
4256 struct if_proto *proto = NULL;
4257 int if_proto_count = 0;
4258 struct if_proto **tmp_ifproto_arr = NULL;
4259 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4260 int tmp_ifproto_arr_idx = 0;
4261 bool tmp_malloc = false;
4262
4263 /*
4264 * Pass the event to the interface filters
4265 */
4266 lck_mtx_lock_spin(&ifp->if_flt_lock);
4267 /* prevent filter list from changing in case we drop the lock */
4268 if_flt_monitor_busy(ifp);
4269 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4270 if (filter->filt_event != NULL) {
4271 lck_mtx_unlock(&ifp->if_flt_lock);
4272
4273 filter->filt_event(filter->filt_cookie, ifp,
4274 filter->filt_protocol, event);
4275
4276 lck_mtx_lock_spin(&ifp->if_flt_lock);
4277 }
4278 }
4279 /* we're done with the filter list */
4280 if_flt_monitor_unbusy(ifp);
4281 lck_mtx_unlock(&ifp->if_flt_lock);
4282
4283 /* Get an io ref count if the interface is attached */
4284 if (!ifnet_is_attached(ifp, 1)) {
4285 goto done;
4286 }
4287
4288 /*
4289 * An embedded tmp_list_entry in if_proto may still get
4290 * over-written by another thread after giving up ifnet lock,
4291 * therefore we are avoiding embedded pointers here.
4292 */
4293 ifnet_lock_shared(ifp);
4294 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
4295 if (if_proto_count) {
4296 int i;
4297 VERIFY(ifp->if_proto_hash != NULL);
4298 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4299 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4300 } else {
4301 MALLOC(tmp_ifproto_arr, struct if_proto **,
4302 sizeof(*tmp_ifproto_arr) * if_proto_count,
4303 M_TEMP, M_ZERO);
4304 if (tmp_ifproto_arr == NULL) {
4305 ifnet_lock_done(ifp);
4306 goto cleanup;
4307 }
4308 tmp_malloc = true;
4309 }
4310
4311 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
4312 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4313 next_hash) {
4314 if_proto_ref(proto);
4315 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4316 tmp_ifproto_arr_idx++;
4317 }
4318 }
4319 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
4320 }
4321 ifnet_lock_done(ifp);
4322
4323 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4324 tmp_ifproto_arr_idx++) {
4325 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4326 VERIFY(proto != NULL);
4327 proto_media_event eventp =
4328 (proto->proto_kpi == kProtoKPI_v1 ?
4329 proto->kpi.v1.event :
4330 proto->kpi.v2.event);
4331
4332 if (eventp != NULL) {
4333 eventp(ifp, proto->protocol_family,
4334 event);
4335 }
4336 if_proto_free(proto);
4337 }
4338
4339 cleanup:
4340 if (tmp_malloc) {
4341 FREE(tmp_ifproto_arr, M_TEMP);
4342 }
4343
4344 /* Pass the event to the interface */
4345 if (ifp->if_event != NULL) {
4346 ifp->if_event(ifp, event);
4347 }
4348
4349 /* Release the io ref count */
4350 ifnet_decr_iorefcnt(ifp);
4351 done:
4352 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
4353 }
4354
4355 errno_t
4356 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
4357 {
4358 struct kev_msg kev_msg;
4359 int result = 0;
4360
4361 if (ifp == NULL || event == NULL) {
4362 return EINVAL;
4363 }
4364
4365 bzero(&kev_msg, sizeof(kev_msg));
4366 kev_msg.vendor_code = event->vendor_code;
4367 kev_msg.kev_class = event->kev_class;
4368 kev_msg.kev_subclass = event->kev_subclass;
4369 kev_msg.event_code = event->event_code;
4370 kev_msg.dv[0].data_ptr = &event->event_data[0];
4371 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4372 kev_msg.dv[1].data_length = 0;
4373
4374 result = dlil_event_internal(ifp, &kev_msg, TRUE);
4375
4376 return result;
4377 }
4378
4379 #if CONFIG_MACF_NET
4380 #include <netinet/ip6.h>
4381 #include <netinet/ip.h>
4382 static int
4383 dlil_get_socket_type(struct mbuf **mp, int family, int raw)
4384 {
4385 struct mbuf *m;
4386 struct ip *ip;
4387 struct ip6_hdr *ip6;
4388 int type = SOCK_RAW;
4389
4390 if (!raw) {
4391 switch (family) {
4392 case PF_INET:
4393 m = m_pullup(*mp, sizeof(struct ip));
4394 if (m == NULL) {
4395 break;
4396 }
4397 *mp = m;
4398 ip = mtod(m, struct ip *);
4399 if (ip->ip_p == IPPROTO_TCP) {
4400 type = SOCK_STREAM;
4401 } else if (ip->ip_p == IPPROTO_UDP) {
4402 type = SOCK_DGRAM;
4403 }
4404 break;
4405 case PF_INET6:
4406 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4407 if (m == NULL) {
4408 break;
4409 }
4410 *mp = m;
4411 ip6 = mtod(m, struct ip6_hdr *);
4412 if (ip6->ip6_nxt == IPPROTO_TCP) {
4413 type = SOCK_STREAM;
4414 } else if (ip6->ip6_nxt == IPPROTO_UDP) {
4415 type = SOCK_DGRAM;
4416 }
4417 break;
4418 }
4419 }
4420
4421 return type;
4422 }
4423 #endif
4424
4425 static void
4426 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4427 {
4428 mbuf_t n = m;
4429 int chainlen = 0;
4430
4431 while (n != NULL) {
4432 chainlen++;
4433 n = n->m_next;
4434 }
4435 switch (chainlen) {
4436 case 0:
4437 break;
4438 case 1:
4439 atomic_add_64(&cls->cls_one, 1);
4440 break;
4441 case 2:
4442 atomic_add_64(&cls->cls_two, 1);
4443 break;
4444 case 3:
4445 atomic_add_64(&cls->cls_three, 1);
4446 break;
4447 case 4:
4448 atomic_add_64(&cls->cls_four, 1);
4449 break;
4450 case 5:
4451 default:
4452 atomic_add_64(&cls->cls_five_or_more, 1);
4453 break;
4454 }
4455 }
4456
4457 /*
4458 * dlil_output
4459 *
4460 * Caller should have a lock on the protocol domain if the protocol
4461 * doesn't support finer grained locking. In most cases, the lock
4462 * will be held from the socket layer and won't be released until
4463 * we return back to the socket layer.
4464 *
4465 * This does mean that we must take a protocol lock before we take
4466 * an interface lock if we're going to take both. This makes sense
4467 * because a protocol is likely to interact with an ifp while it
4468 * is under the protocol lock.
4469 *
4470 * An advisory code will be returned if adv is not null. This
4471 * can be used to provide feedback about interface queues to the
4472 * application.
4473 */
4474 errno_t
4475 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4476 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4477 {
4478 char *frame_type = NULL;
4479 char *dst_linkaddr = NULL;
4480 int retval = 0;
4481 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4482 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4483 struct if_proto *proto = NULL;
4484 mbuf_t m = NULL;
4485 mbuf_t send_head = NULL;
4486 mbuf_t *send_tail = &send_head;
4487 int iorefcnt = 0;
4488 u_int32_t pre = 0, post = 0;
4489 u_int32_t fpkts = 0, fbytes = 0;
4490 int32_t flen = 0;
4491 struct timespec now;
4492 u_int64_t now_nsec;
4493 boolean_t did_clat46 = FALSE;
4494 protocol_family_t old_proto_family = proto_family;
4495 struct rtentry *rt = NULL;
4496
4497 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4498
4499 /*
4500 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4501 * from happening while this operation is in progress
4502 */
4503 if (!ifnet_is_attached(ifp, 1)) {
4504 retval = ENXIO;
4505 goto cleanup;
4506 }
4507 iorefcnt = 1;
4508
4509 VERIFY(ifp->if_output_dlil != NULL);
4510
4511 /* update the driver's multicast filter, if needed */
4512 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
4513 ifp->if_updatemcasts = 0;
4514 }
4515
4516 frame_type = frame_type_buffer;
4517 dst_linkaddr = dst_linkaddr_buffer;
4518
4519 if (raw == 0) {
4520 ifnet_lock_shared(ifp);
4521 /* callee holds a proto refcnt upon success */
4522 proto = find_attached_proto(ifp, proto_family);
4523 if (proto == NULL) {
4524 ifnet_lock_done(ifp);
4525 retval = ENXIO;
4526 goto cleanup;
4527 }
4528 ifnet_lock_done(ifp);
4529 }
4530
4531 preout_again:
4532 if (packetlist == NULL) {
4533 goto cleanup;
4534 }
4535
4536 m = packetlist;
4537 packetlist = packetlist->m_nextpkt;
4538 m->m_nextpkt = NULL;
4539
4540 /*
4541 * Perform address family translation for the first
4542 * packet outside the loop in order to perform address
4543 * lookup for the translated proto family.
4544 */
4545 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
4546 (ifp->if_type == IFT_CELLULAR ||
4547 dlil_is_clat_needed(proto_family, m))) {
4548 retval = dlil_clat46(ifp, &proto_family, &m);
4549 /*
4550 * Go to the next packet if translation fails
4551 */
4552 if (retval != 0) {
4553 m_freem(m);
4554 m = NULL;
4555 ip6stat.ip6s_clat464_out_drop++;
4556 /* Make sure that the proto family is PF_INET */
4557 ASSERT(proto_family == PF_INET);
4558 goto preout_again;
4559 }
4560 /*
4561 * Free the old one and make it point to the IPv6 proto structure.
4562 *
4563 * Change proto for the first time we have successfully
4564 * performed address family translation.
4565 */
4566 if (!did_clat46 && proto_family == PF_INET6) {
4567 struct sockaddr_in6 dest6;
4568 did_clat46 = TRUE;
4569
4570 if (proto != NULL) {
4571 if_proto_free(proto);
4572 }
4573 ifnet_lock_shared(ifp);
4574 /* callee holds a proto refcnt upon success */
4575 proto = find_attached_proto(ifp, proto_family);
4576 if (proto == NULL) {
4577 ifnet_lock_done(ifp);
4578 retval = ENXIO;
4579 m_freem(m);
4580 m = NULL;
4581 goto cleanup;
4582 }
4583 ifnet_lock_done(ifp);
4584 if (ifp->if_type == IFT_ETHER) {
4585 /* Update the dest to translated v6 address */
4586 dest6.sin6_len = sizeof(struct sockaddr_in6);
4587 dest6.sin6_family = AF_INET6;
4588 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
4589 dest = (const struct sockaddr *)&dest6;
4590
4591 /*
4592 * Lookup route to the translated destination
4593 * Free this route ref during cleanup
4594 */
4595 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
4596 0, 0, ifp->if_index);
4597
4598 route = rt;
4599 }
4600 }
4601 }
4602
4603 /*
4604 * This path gets packet chain going to the same destination.
4605 * The pre output routine is used to either trigger resolution of
4606 * the next hop or retreive the next hop's link layer addressing.
4607 * For ex: ether_inet(6)_pre_output routine.
4608 *
4609 * If the routine returns EJUSTRETURN, it implies that packet has
4610 * been queued, and therefore we have to call preout_again for the
4611 * following packet in the chain.
4612 *
4613 * For errors other than EJUSTRETURN, the current packet is freed
4614 * and the rest of the chain (pointed by packetlist is freed as
4615 * part of clean up.
4616 *
4617 * Else if there is no error the retrieved information is used for
4618 * all the packets in the chain.
4619 */
4620 if (raw == 0) {
4621 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4622 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
4623 retval = 0;
4624 if (preoutp != NULL) {
4625 retval = preoutp(ifp, proto_family, &m, dest, route,
4626 frame_type, dst_linkaddr);
4627
4628 if (retval != 0) {
4629 if (retval == EJUSTRETURN) {
4630 goto preout_again;
4631 }
4632 m_freem(m);
4633 m = NULL;
4634 goto cleanup;
4635 }
4636 }
4637 }
4638
4639 #if CONFIG_MACF_NET
4640 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4641 dlil_get_socket_type(&m, proto_family, raw));
4642 if (retval != 0) {
4643 m_freem(m);
4644 goto cleanup;
4645 }
4646 #endif
4647
4648 do {
4649 /*
4650 * Perform address family translation if needed.
4651 * For now we only support stateless 4 to 6 translation
4652 * on the out path.
4653 *
4654 * The routine below translates IP header, updates protocol
4655 * checksum and also translates ICMP.
4656 *
4657 * We skip the first packet as it is already translated and
4658 * the proto family is set to PF_INET6.
4659 */
4660 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
4661 (ifp->if_type == IFT_CELLULAR ||
4662 dlil_is_clat_needed(proto_family, m))) {
4663 retval = dlil_clat46(ifp, &proto_family, &m);
4664 /* Goto the next packet if the translation fails */
4665 if (retval != 0) {
4666 m_freem(m);
4667 m = NULL;
4668 ip6stat.ip6s_clat464_out_drop++;
4669 goto next;
4670 }
4671 }
4672
4673 #if CONFIG_DTRACE
4674 if (!raw && proto_family == PF_INET) {
4675 struct ip *ip = mtod(m, struct ip *);
4676 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4677 struct ip *, ip, struct ifnet *, ifp,
4678 struct ip *, ip, struct ip6_hdr *, NULL);
4679 } else if (!raw && proto_family == PF_INET6) {
4680 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4681 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4682 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4683 struct ip *, NULL, struct ip6_hdr *, ip6);
4684 }
4685 #endif /* CONFIG_DTRACE */
4686
4687 if (raw == 0 && ifp->if_framer != NULL) {
4688 int rcvif_set = 0;
4689
4690 /*
4691 * If this is a broadcast packet that needs to be
4692 * looped back into the system, set the inbound ifp
4693 * to that of the outbound ifp. This will allow
4694 * us to determine that it is a legitimate packet
4695 * for the system. Only set the ifp if it's not
4696 * already set, just to be safe.
4697 */
4698 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4699 m->m_pkthdr.rcvif == NULL) {
4700 m->m_pkthdr.rcvif = ifp;
4701 rcvif_set = 1;
4702 }
4703
4704 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
4705 frame_type, &pre, &post);
4706 if (retval != 0) {
4707 if (retval != EJUSTRETURN) {
4708 m_freem(m);
4709 }
4710 goto next;
4711 }
4712
4713 /*
4714 * For partial checksum offload, adjust the start
4715 * and stuff offsets based on the prepended header.
4716 */
4717 if ((m->m_pkthdr.csum_flags &
4718 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4719 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4720 m->m_pkthdr.csum_tx_stuff += pre;
4721 m->m_pkthdr.csum_tx_start += pre;
4722 }
4723
4724 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
4725 dlil_output_cksum_dbg(ifp, m, pre,
4726 proto_family);
4727 }
4728
4729 /*
4730 * Clear the ifp if it was set above, and to be
4731 * safe, only if it is still the same as the
4732 * outbound ifp we have in context. If it was
4733 * looped back, then a copy of it was sent to the
4734 * loopback interface with the rcvif set, and we
4735 * are clearing the one that will go down to the
4736 * layer below.
4737 */
4738 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
4739 m->m_pkthdr.rcvif = NULL;
4740 }
4741 }
4742
4743 /*
4744 * Let interface filters (if any) do their thing ...
4745 */
4746 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4747 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4748 retval = dlil_interface_filters_output(ifp,
4749 &m, proto_family);
4750 if (retval != 0) {
4751 if (retval != EJUSTRETURN) {
4752 m_freem(m);
4753 }
4754 goto next;
4755 }
4756 }
4757 /*
4758 * Strip away M_PROTO1 bit prior to sending packet
4759 * to the driver as this field may be used by the driver
4760 */
4761 m->m_flags &= ~M_PROTO1;
4762
4763 /*
4764 * If the underlying interface is not capable of handling a
4765 * packet whose data portion spans across physically disjoint
4766 * pages, we need to "normalize" the packet so that we pass
4767 * down a chain of mbufs where each mbuf points to a span that
4768 * resides in the system page boundary. If the packet does
4769 * not cross page(s), the following is a no-op.
4770 */
4771 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4772 if ((m = m_normalize(m)) == NULL) {
4773 goto next;
4774 }
4775 }
4776
4777 /*
4778 * If this is a TSO packet, make sure the interface still
4779 * advertise TSO capability.
4780 */
4781 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
4782 retval = EMSGSIZE;
4783 m_freem(m);
4784 goto cleanup;
4785 }
4786
4787 ifp_inc_traffic_class_out(ifp, m);
4788 pktap_output(ifp, proto_family, m, pre, post);
4789
4790 /*
4791 * Count the number of elements in the mbuf chain
4792 */
4793 if (tx_chain_len_count) {
4794 dlil_count_chain_len(m, &tx_chain_len_stats);
4795 }
4796
4797 /*
4798 * Record timestamp; ifnet_enqueue() will use this info
4799 * rather than redoing the work. An optimization could
4800 * involve doing this just once at the top, if there are
4801 * no interface filters attached, but that's probably
4802 * not a big deal.
4803 */
4804 nanouptime(&now);
4805 net_timernsec(&now, &now_nsec);
4806 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
4807
4808 /*
4809 * Discard partial sum information if this packet originated
4810 * from another interface; the packet would already have the
4811 * final checksum and we shouldn't recompute it.
4812 */
4813 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
4814 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4815 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4816 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4817 m->m_pkthdr.csum_data = 0;
4818 }
4819
4820 /*
4821 * Finally, call the driver.
4822 */
4823 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
4824 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4825 flen += (m_pktlen(m) - (pre + post));
4826 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4827 }
4828 *send_tail = m;
4829 send_tail = &m->m_nextpkt;
4830 } else {
4831 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4832 flen = (m_pktlen(m) - (pre + post));
4833 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4834 } else {
4835 flen = 0;
4836 }
4837 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4838 0, 0, 0, 0, 0);
4839 retval = (*ifp->if_output_dlil)(ifp, m);
4840 if (retval == EQFULL || retval == EQSUSPENDED) {
4841 if (adv != NULL && adv->code == FADV_SUCCESS) {
4842 adv->code = (retval == EQFULL ?
4843 FADV_FLOW_CONTROLLED :
4844 FADV_SUSPENDED);
4845 }
4846 retval = 0;
4847 }
4848 if (retval == 0 && flen > 0) {
4849 fbytes += flen;
4850 fpkts++;
4851 }
4852 if (retval != 0 && dlil_verbose) {
4853 printf("%s: output error on %s retval = %d\n",
4854 __func__, if_name(ifp),
4855 retval);
4856 }
4857 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
4858 0, 0, 0, 0, 0);
4859 }
4860 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4861
4862 next:
4863 m = packetlist;
4864 if (m != NULL) {
4865 packetlist = packetlist->m_nextpkt;
4866 m->m_nextpkt = NULL;
4867 }
4868 /* Reset the proto family to old proto family for CLAT */
4869 if (did_clat46) {
4870 proto_family = old_proto_family;
4871 }
4872 } while (m != NULL);
4873
4874 if (send_head != NULL) {
4875 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4876 0, 0, 0, 0, 0);
4877 if (ifp->if_eflags & IFEF_SENDLIST) {
4878 retval = (*ifp->if_output_dlil)(ifp, send_head);
4879 if (retval == EQFULL || retval == EQSUSPENDED) {
4880 if (adv != NULL) {
4881 adv->code = (retval == EQFULL ?
4882 FADV_FLOW_CONTROLLED :
4883 FADV_SUSPENDED);
4884 }
4885 retval = 0;
4886 }
4887 if (retval == 0 && flen > 0) {
4888 fbytes += flen;
4889 fpkts++;
4890 }
4891 if (retval != 0 && dlil_verbose) {
4892 printf("%s: output error on %s retval = %d\n",
4893 __func__, if_name(ifp), retval);
4894 }
4895 } else {
4896 struct mbuf *send_m;
4897 int enq_cnt = 0;
4898 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4899 while (send_head != NULL) {
4900 send_m = send_head;
4901 send_head = send_m->m_nextpkt;
4902 send_m->m_nextpkt = NULL;
4903 retval = (*ifp->if_output_dlil)(ifp, send_m);
4904 if (retval == EQFULL || retval == EQSUSPENDED) {
4905 if (adv != NULL) {
4906 adv->code = (retval == EQFULL ?
4907 FADV_FLOW_CONTROLLED :
4908 FADV_SUSPENDED);
4909 }
4910 retval = 0;
4911 }
4912 if (retval == 0) {
4913 enq_cnt++;
4914 if (flen > 0) {
4915 fpkts++;
4916 }
4917 }
4918 if (retval != 0 && dlil_verbose) {
4919 printf("%s: output error on %s "
4920 "retval = %d\n",
4921 __func__, if_name(ifp), retval);
4922 }
4923 }
4924 if (enq_cnt > 0) {
4925 fbytes += flen;
4926 ifnet_start(ifp);
4927 }
4928 }
4929 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4930 }
4931
4932 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4933
4934 cleanup:
4935 if (fbytes > 0) {
4936 ifp->if_fbytes += fbytes;
4937 }
4938 if (fpkts > 0) {
4939 ifp->if_fpackets += fpkts;
4940 }
4941 if (proto != NULL) {
4942 if_proto_free(proto);
4943 }
4944 if (packetlist) { /* if any packets are left, clean up */
4945 mbuf_freem_list(packetlist);
4946 }
4947 if (retval == EJUSTRETURN) {
4948 retval = 0;
4949 }
4950 if (iorefcnt == 1) {
4951 ifnet_decr_iorefcnt(ifp);
4952 }
4953 if (rt != NULL) {
4954 rtfree(rt);
4955 rt = NULL;
4956 }
4957
4958 return retval;
4959 }
4960
4961 /*
4962 * This routine checks if the destination address is not a loopback, link-local,
4963 * multicast or broadcast address.
4964 */
4965 static int
4966 dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
4967 {
4968 int ret = 0;
4969 switch (proto_family) {
4970 case PF_INET: {
4971 struct ip *iph = mtod(m, struct ip *);
4972 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
4973 ret = 1;
4974 }
4975 break;
4976 }
4977 case PF_INET6: {
4978 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
4979 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
4980 CLAT64_NEEDED(&ip6h->ip6_dst)) {
4981 ret = 1;
4982 }
4983 break;
4984 }
4985 }
4986
4987 return ret;
4988 }
4989 /*
4990 * @brief This routine translates IPv4 packet to IPv6 packet,
4991 * updates protocol checksum and also translates ICMP for code
4992 * along with inner header translation.
4993 *
4994 * @param ifp Pointer to the interface
4995 * @param proto_family pointer to protocol family. It is updated if function
4996 * performs the translation successfully.
4997 * @param m Pointer to the pointer pointing to the packet. Needed because this
4998 * routine can end up changing the mbuf to a different one.
4999 *
5000 * @return 0 on success or else a negative value.
5001 */
5002 static errno_t
5003 dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5004 {
5005 VERIFY(*proto_family == PF_INET);
5006 VERIFY(IS_INTF_CLAT46(ifp));
5007
5008 pbuf_t pbuf_store, *pbuf = NULL;
5009 struct ip *iph = NULL;
5010 struct in_addr osrc, odst;
5011 uint8_t proto = 0;
5012 struct in6_ifaddr *ia6_clat_src = NULL;
5013 struct in6_addr *src = NULL;
5014 struct in6_addr dst;
5015 int error = 0;
5016 uint32_t off = 0;
5017 uint64_t tot_len = 0;
5018 uint16_t ip_id_val = 0;
5019 uint16_t ip_frag_off = 0;
5020
5021 boolean_t is_frag = FALSE;
5022 boolean_t is_first_frag = TRUE;
5023 boolean_t is_last_frag = TRUE;
5024
5025 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5026 pbuf = &pbuf_store;
5027 iph = pbuf->pb_data;
5028
5029 osrc = iph->ip_src;
5030 odst = iph->ip_dst;
5031 proto = iph->ip_p;
5032 off = iph->ip_hl << 2;
5033 ip_id_val = iph->ip_id;
5034 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
5035
5036 tot_len = ntohs(iph->ip_len);
5037
5038 /*
5039 * For packets that are not first frags
5040 * we only need to adjust CSUM.
5041 * For 4 to 6, Fragmentation header gets appended
5042 * after proto translation.
5043 */
5044 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
5045 is_frag = TRUE;
5046
5047 /* If the offset is not zero, it is not first frag */
5048 if (ip_frag_off != 0) {
5049 is_first_frag = FALSE;
5050 }
5051
5052 /* If IP_MF is set, then it is not last frag */
5053 if (ntohs(iph->ip_off) & IP_MF) {
5054 is_last_frag = FALSE;
5055 }
5056 }
5057
5058 /*
5059 * Retrive the local IPv6 CLAT46 address reserved for stateless
5060 * translation.
5061 */
5062 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5063 if (ia6_clat_src == NULL) {
5064 ip6stat.ip6s_clat464_out_nov6addr_drop++;
5065 error = -1;
5066 goto cleanup;
5067 }
5068
5069 src = &ia6_clat_src->ia_addr.sin6_addr;
5070
5071 /*
5072 * Translate IPv4 destination to IPv6 destination by using the
5073 * prefixes learned through prior PLAT discovery.
5074 */
5075 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
5076 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
5077 goto cleanup;
5078 }
5079
5080 /* Translate the IP header part first */
5081 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
5082 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
5083
5084 iph = NULL; /* Invalidate iph as pbuf has been modified */
5085
5086 if (error != 0) {
5087 ip6stat.ip6s_clat464_out_46transfail_drop++;
5088 goto cleanup;
5089 }
5090
5091 /*
5092 * Translate protocol header, update checksum, checksum flags
5093 * and related fields.
5094 */
5095 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
5096 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
5097
5098 if (error != 0) {
5099 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
5100 goto cleanup;
5101 }
5102
5103 /* Now insert the IPv6 fragment header */
5104 if (is_frag) {
5105 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
5106
5107 if (error != 0) {
5108 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
5109 goto cleanup;
5110 }
5111 }
5112
5113 cleanup:
5114 if (ia6_clat_src != NULL) {
5115 IFA_REMREF(&ia6_clat_src->ia_ifa);
5116 }
5117
5118 if (pbuf_is_valid(pbuf)) {
5119 *m = pbuf->pb_mbuf;
5120 pbuf->pb_mbuf = NULL;
5121 pbuf_destroy(pbuf);
5122 } else {
5123 error = -1;
5124 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
5125 }
5126
5127 if (error == 0) {
5128 *proto_family = PF_INET6;
5129 ip6stat.ip6s_clat464_out_success++;
5130 }
5131
5132 return error;
5133 }
5134
5135 /*
5136 * @brief This routine translates incoming IPv6 to IPv4 packet,
5137 * updates protocol checksum and also translates ICMPv6 outer
5138 * and inner headers
5139 *
5140 * @return 0 on success or else a negative value.
5141 */
5142 static errno_t
5143 dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5144 {
5145 VERIFY(*proto_family == PF_INET6);
5146 VERIFY(IS_INTF_CLAT46(ifp));
5147
5148 struct ip6_hdr *ip6h = NULL;
5149 struct in6_addr osrc, odst;
5150 uint8_t proto = 0;
5151 struct in6_ifaddr *ia6_clat_dst = NULL;
5152 struct in_ifaddr *ia4_clat_dst = NULL;
5153 struct in_addr *dst = NULL;
5154 struct in_addr src;
5155 int error = 0;
5156 uint32_t off = 0;
5157 u_int64_t tot_len = 0;
5158 uint8_t tos = 0;
5159 boolean_t is_first_frag = TRUE;
5160
5161 /* Incoming mbuf does not contain valid IP6 header */
5162 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5163 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5164 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5165 ip6stat.ip6s_clat464_in_tooshort_drop++;
5166 return -1;
5167 }
5168
5169 ip6h = mtod(*m, struct ip6_hdr *);
5170 /* Validate that mbuf contains IP payload equal to ip6_plen */
5171 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5172 ip6stat.ip6s_clat464_in_tooshort_drop++;
5173 return -1;
5174 }
5175
5176 osrc = ip6h->ip6_src;
5177 odst = ip6h->ip6_dst;
5178
5179 /*
5180 * Retrieve the local CLAT46 reserved IPv6 address.
5181 * Let the packet pass if we don't find one, as the flag
5182 * may get set before IPv6 configuration has taken place.
5183 */
5184 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5185 if (ia6_clat_dst == NULL) {
5186 goto done;
5187 }
5188
5189 /*
5190 * Check if the original dest in the packet is same as the reserved
5191 * CLAT46 IPv6 address
5192 */
5193 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5194 pbuf_t pbuf_store, *pbuf = NULL;
5195 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5196 pbuf = &pbuf_store;
5197
5198 /*
5199 * Retrive the local CLAT46 IPv4 address reserved for stateless
5200 * translation.
5201 */
5202 ia4_clat_dst = inifa_ifpclatv4(ifp);
5203 if (ia4_clat_dst == NULL) {
5204 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5205 ip6stat.ip6s_clat464_in_nov4addr_drop++;
5206 error = -1;
5207 goto cleanup;
5208 }
5209 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5210
5211 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5212 dst = &ia4_clat_dst->ia_addr.sin_addr;
5213 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5214 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5215 error = -1;
5216 goto cleanup;
5217 }
5218
5219 ip6h = pbuf->pb_data;
5220 off = sizeof(struct ip6_hdr);
5221 proto = ip6h->ip6_nxt;
5222 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5223 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5224
5225 /*
5226 * Translate the IP header and update the fragmentation
5227 * header if needed
5228 */
5229 error = (nat464_translate_64(pbuf, off, tos, &proto,
5230 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5231 0 : -1;
5232
5233 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5234
5235 if (error != 0) {
5236 ip6stat.ip6s_clat464_in_64transfail_drop++;
5237 goto cleanup;
5238 }
5239
5240 /*
5241 * Translate protocol header, update checksum, checksum flags
5242 * and related fields.
5243 */
5244 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5245 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5246 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5247
5248 if (error != 0) {
5249 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5250 goto cleanup;
5251 }
5252
5253 cleanup:
5254 if (ia4_clat_dst != NULL) {
5255 IFA_REMREF(&ia4_clat_dst->ia_ifa);
5256 }
5257
5258 if (pbuf_is_valid(pbuf)) {
5259 *m = pbuf->pb_mbuf;
5260 pbuf->pb_mbuf = NULL;
5261 pbuf_destroy(pbuf);
5262 } else {
5263 error = -1;
5264 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5265 }
5266
5267 if (error == 0) {
5268 *proto_family = PF_INET;
5269 ip6stat.ip6s_clat464_in_success++;
5270 }
5271 } /* CLAT traffic */
5272
5273 done:
5274 return error;
5275 }
5276
5277 errno_t
5278 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5279 void *ioctl_arg)
5280 {
5281 struct ifnet_filter *filter;
5282 int retval = EOPNOTSUPP;
5283 int result = 0;
5284
5285 if (ifp == NULL || ioctl_code == 0) {
5286 return EINVAL;
5287 }
5288
5289 /* Get an io ref count if the interface is attached */
5290 if (!ifnet_is_attached(ifp, 1)) {
5291 return EOPNOTSUPP;
5292 }
5293
5294 /*
5295 * Run the interface filters first.
5296 * We want to run all filters before calling the protocol,
5297 * interface family, or interface.
5298 */
5299 lck_mtx_lock_spin(&ifp->if_flt_lock);
5300 /* prevent filter list from changing in case we drop the lock */
5301 if_flt_monitor_busy(ifp);
5302 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5303 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5304 filter->filt_protocol == proto_fam)) {
5305 lck_mtx_unlock(&ifp->if_flt_lock);
5306
5307 result = filter->filt_ioctl(filter->filt_cookie, ifp,
5308 proto_fam, ioctl_code, ioctl_arg);
5309
5310 lck_mtx_lock_spin(&ifp->if_flt_lock);
5311
5312 /* Only update retval if no one has handled the ioctl */
5313 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5314 if (result == ENOTSUP) {
5315 result = EOPNOTSUPP;
5316 }
5317 retval = result;
5318 if (retval != 0 && retval != EOPNOTSUPP) {
5319 /* we're done with the filter list */
5320 if_flt_monitor_unbusy(ifp);
5321 lck_mtx_unlock(&ifp->if_flt_lock);
5322 goto cleanup;
5323 }
5324 }
5325 }
5326 }
5327 /* we're done with the filter list */
5328 if_flt_monitor_unbusy(ifp);
5329 lck_mtx_unlock(&ifp->if_flt_lock);
5330
5331 /* Allow the protocol to handle the ioctl */
5332 if (proto_fam != 0) {
5333 struct if_proto *proto;
5334
5335 /* callee holds a proto refcnt upon success */
5336 ifnet_lock_shared(ifp);
5337 proto = find_attached_proto(ifp, proto_fam);
5338 ifnet_lock_done(ifp);
5339 if (proto != NULL) {
5340 proto_media_ioctl ioctlp =
5341 (proto->proto_kpi == kProtoKPI_v1 ?
5342 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
5343 result = EOPNOTSUPP;
5344 if (ioctlp != NULL) {
5345 result = ioctlp(ifp, proto_fam, ioctl_code,
5346 ioctl_arg);
5347 }
5348 if_proto_free(proto);
5349
5350 /* Only update retval if no one has handled the ioctl */
5351 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5352 if (result == ENOTSUP) {
5353 result = EOPNOTSUPP;
5354 }
5355 retval = result;
5356 if (retval && retval != EOPNOTSUPP) {
5357 goto cleanup;
5358 }
5359 }
5360 }
5361 }
5362
5363 /* retval is either 0 or EOPNOTSUPP */
5364
5365 /*
5366 * Let the interface handle this ioctl.
5367 * If it returns EOPNOTSUPP, ignore that, we may have
5368 * already handled this in the protocol or family.
5369 */
5370 if (ifp->if_ioctl) {
5371 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
5372 }
5373
5374 /* Only update retval if no one has handled the ioctl */
5375 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5376 if (result == ENOTSUP) {
5377 result = EOPNOTSUPP;
5378 }
5379 retval = result;
5380 if (retval && retval != EOPNOTSUPP) {
5381 goto cleanup;
5382 }
5383 }
5384
5385 cleanup:
5386 if (retval == EJUSTRETURN) {
5387 retval = 0;
5388 }
5389
5390 ifnet_decr_iorefcnt(ifp);
5391
5392 return retval;
5393 }
5394
5395 __private_extern__ errno_t
5396 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
5397 {
5398 errno_t error = 0;
5399
5400
5401 if (ifp->if_set_bpf_tap) {
5402 /* Get an io reference on the interface if it is attached */
5403 if (!ifnet_is_attached(ifp, 1)) {
5404 return ENXIO;
5405 }
5406 error = ifp->if_set_bpf_tap(ifp, mode, callback);
5407 ifnet_decr_iorefcnt(ifp);
5408 }
5409 return error;
5410 }
5411
5412 errno_t
5413 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
5414 struct sockaddr *ll_addr, size_t ll_len)
5415 {
5416 errno_t result = EOPNOTSUPP;
5417 struct if_proto *proto;
5418 const struct sockaddr *verify;
5419 proto_media_resolve_multi resolvep;
5420
5421 if (!ifnet_is_attached(ifp, 1)) {
5422 return result;
5423 }
5424
5425 bzero(ll_addr, ll_len);
5426
5427 /* Call the protocol first; callee holds a proto refcnt upon success */
5428 ifnet_lock_shared(ifp);
5429 proto = find_attached_proto(ifp, proto_addr->sa_family);
5430 ifnet_lock_done(ifp);
5431 if (proto != NULL) {
5432 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
5433 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
5434 if (resolvep != NULL) {
5435 result = resolvep(ifp, proto_addr,
5436 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
5437 }
5438 if_proto_free(proto);
5439 }
5440
5441 /* Let the interface verify the multicast address */
5442 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
5443 if (result == 0) {
5444 verify = ll_addr;
5445 } else {
5446 verify = proto_addr;
5447 }
5448 result = ifp->if_check_multi(ifp, verify);
5449 }
5450
5451 ifnet_decr_iorefcnt(ifp);
5452 return result;
5453 }
5454
5455 __private_extern__ errno_t
5456 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
5457 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5458 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5459 {
5460 struct if_proto *proto;
5461 errno_t result = 0;
5462
5463 /* callee holds a proto refcnt upon success */
5464 ifnet_lock_shared(ifp);
5465 proto = find_attached_proto(ifp, target_proto->sa_family);
5466 ifnet_lock_done(ifp);
5467 if (proto == NULL) {
5468 result = ENOTSUP;
5469 } else {
5470 proto_media_send_arp arpp;
5471 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
5472 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
5473 if (arpp == NULL) {
5474 result = ENOTSUP;
5475 } else {
5476 switch (arpop) {
5477 case ARPOP_REQUEST:
5478 arpstat.txrequests++;
5479 if (target_hw != NULL) {
5480 arpstat.txurequests++;
5481 }
5482 break;
5483 case ARPOP_REPLY:
5484 arpstat.txreplies++;
5485 break;
5486 }
5487 result = arpp(ifp, arpop, sender_hw, sender_proto,
5488 target_hw, target_proto);
5489 }
5490 if_proto_free(proto);
5491 }
5492
5493 return result;
5494 }
5495
5496 struct net_thread_marks { };
5497 static const struct net_thread_marks net_thread_marks_base = { };
5498
5499 __private_extern__ const net_thread_marks_t net_thread_marks_none =
5500 &net_thread_marks_base;
5501
5502 __private_extern__ net_thread_marks_t
5503 net_thread_marks_push(u_int32_t push)
5504 {
5505 static const char *const base = (const void*)&net_thread_marks_base;
5506 u_int32_t pop = 0;
5507
5508 if (push != 0) {
5509 struct uthread *uth = get_bsdthread_info(current_thread());
5510
5511 pop = push & ~uth->uu_network_marks;
5512 if (pop != 0) {
5513 uth->uu_network_marks |= pop;
5514 }
5515 }
5516
5517 return (net_thread_marks_t)&base[pop];
5518 }
5519
5520 __private_extern__ net_thread_marks_t
5521 net_thread_unmarks_push(u_int32_t unpush)
5522 {
5523 static const char *const base = (const void*)&net_thread_marks_base;
5524 u_int32_t unpop = 0;
5525
5526 if (unpush != 0) {
5527 struct uthread *uth = get_bsdthread_info(current_thread());
5528
5529 unpop = unpush & uth->uu_network_marks;
5530 if (unpop != 0) {
5531 uth->uu_network_marks &= ~unpop;
5532 }
5533 }
5534
5535 return (net_thread_marks_t)&base[unpop];
5536 }
5537
5538 __private_extern__ void
5539 net_thread_marks_pop(net_thread_marks_t popx)
5540 {
5541 static const char *const base = (const void*)&net_thread_marks_base;
5542 const ptrdiff_t pop = (const char *)popx - (const char *)base;
5543
5544 if (pop != 0) {
5545 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
5546 struct uthread *uth = get_bsdthread_info(current_thread());
5547
5548 VERIFY((pop & ones) == pop);
5549 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
5550 uth->uu_network_marks &= ~pop;
5551 }
5552 }
5553
5554 __private_extern__ void
5555 net_thread_unmarks_pop(net_thread_marks_t unpopx)
5556 {
5557 static const char *const base = (const void*)&net_thread_marks_base;
5558 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
5559
5560 if (unpop != 0) {
5561 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
5562 struct uthread *uth = get_bsdthread_info(current_thread());
5563
5564 VERIFY((unpop & ones) == unpop);
5565 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
5566 uth->uu_network_marks |= unpop;
5567 }
5568 }
5569
5570 __private_extern__ u_int32_t
5571 net_thread_is_marked(u_int32_t check)
5572 {
5573 if (check != 0) {
5574 struct uthread *uth = get_bsdthread_info(current_thread());
5575 return uth->uu_network_marks & check;
5576 } else {
5577 return 0;
5578 }
5579 }
5580
5581 __private_extern__ u_int32_t
5582 net_thread_is_unmarked(u_int32_t check)
5583 {
5584 if (check != 0) {
5585 struct uthread *uth = get_bsdthread_info(current_thread());
5586 return ~uth->uu_network_marks & check;
5587 } else {
5588 return 0;
5589 }
5590 }
5591
5592 static __inline__ int
5593 _is_announcement(const struct sockaddr_in * sender_sin,
5594 const struct sockaddr_in * target_sin)
5595 {
5596 if (sender_sin == NULL) {
5597 return FALSE;
5598 }
5599 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
5600 }
5601
5602 __private_extern__ errno_t
5603 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
5604 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
5605 const struct sockaddr *target_proto0, u_int32_t rtflags)
5606 {
5607 errno_t result = 0;
5608 const struct sockaddr_in * sender_sin;
5609 const struct sockaddr_in * target_sin;
5610 struct sockaddr_inarp target_proto_sinarp;
5611 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
5612
5613 if (target_proto == NULL || (sender_proto != NULL &&
5614 sender_proto->sa_family != target_proto->sa_family)) {
5615 return EINVAL;
5616 }
5617
5618 /*
5619 * If the target is a (default) router, provide that
5620 * information to the send_arp callback routine.
5621 */
5622 if (rtflags & RTF_ROUTER) {
5623 bcopy(target_proto, &target_proto_sinarp,
5624 sizeof(struct sockaddr_in));
5625 target_proto_sinarp.sin_other |= SIN_ROUTER;
5626 target_proto = (struct sockaddr *)&target_proto_sinarp;
5627 }
5628
5629 /*
5630 * If this is an ARP request and the target IP is IPv4LL,
5631 * send the request on all interfaces. The exception is
5632 * an announcement, which must only appear on the specific
5633 * interface.
5634 */
5635 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
5636 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
5637 if (target_proto->sa_family == AF_INET &&
5638 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
5639 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
5640 !_is_announcement(target_sin, sender_sin)) {
5641 ifnet_t *ifp_list;
5642 u_int32_t count;
5643 u_int32_t ifp_on;
5644
5645 result = ENOTSUP;
5646
5647 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
5648 for (ifp_on = 0; ifp_on < count; ifp_on++) {
5649 errno_t new_result;
5650 ifaddr_t source_hw = NULL;
5651 ifaddr_t source_ip = NULL;
5652 struct sockaddr_in source_ip_copy;
5653 struct ifnet *cur_ifp = ifp_list[ifp_on];
5654
5655 /*
5656 * Only arp on interfaces marked for IPv4LL
5657 * ARPing. This may mean that we don't ARP on
5658 * the interface the subnet route points to.
5659 */
5660 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
5661 continue;
5662 }
5663
5664 /* Find the source IP address */
5665 ifnet_lock_shared(cur_ifp);
5666 source_hw = cur_ifp->if_lladdr;
5667 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
5668 ifa_link) {
5669 IFA_LOCK(source_ip);
5670 if (source_ip->ifa_addr != NULL &&
5671 source_ip->ifa_addr->sa_family ==
5672 AF_INET) {
5673 /* Copy the source IP address */
5674 source_ip_copy =
5675 *(struct sockaddr_in *)
5676 (void *)source_ip->ifa_addr;
5677 IFA_UNLOCK(source_ip);
5678 break;
5679 }
5680 IFA_UNLOCK(source_ip);
5681 }
5682
5683 /* No IP Source, don't arp */
5684 if (source_ip == NULL) {
5685 ifnet_lock_done(cur_ifp);
5686 continue;
5687 }
5688
5689 IFA_ADDREF(source_hw);
5690 ifnet_lock_done(cur_ifp);
5691
5692 /* Send the ARP */
5693 new_result = dlil_send_arp_internal(cur_ifp,
5694 arpop, (struct sockaddr_dl *)(void *)
5695 source_hw->ifa_addr,
5696 (struct sockaddr *)&source_ip_copy, NULL,
5697 target_proto);
5698
5699 IFA_REMREF(source_hw);
5700 if (result == ENOTSUP) {
5701 result = new_result;
5702 }
5703 }
5704 ifnet_list_free(ifp_list);
5705 }
5706 } else {
5707 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
5708 sender_proto, target_hw, target_proto);
5709 }
5710
5711 return result;
5712 }
5713
5714 /*
5715 * Caller must hold ifnet head lock.
5716 */
5717 static int
5718 ifnet_lookup(struct ifnet *ifp)
5719 {
5720 struct ifnet *_ifp;
5721
5722 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
5723 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
5724 if (_ifp == ifp) {
5725 break;
5726 }
5727 }
5728 return _ifp != NULL;
5729 }
5730
5731 /*
5732 * Caller has to pass a non-zero refio argument to get a
5733 * IO reference count. This will prevent ifnet_detach from
5734 * being called when there are outstanding io reference counts.
5735 */
5736 int
5737 ifnet_is_attached(struct ifnet *ifp, int refio)
5738 {
5739 int ret;
5740
5741 lck_mtx_lock_spin(&ifp->if_ref_lock);
5742 if ((ret = IF_FULLY_ATTACHED(ifp))) {
5743 if (refio > 0) {
5744 ifp->if_refio++;
5745 }
5746 }
5747 lck_mtx_unlock(&ifp->if_ref_lock);
5748
5749 return ret;
5750 }
5751
5752 /*
5753 * Caller must ensure the interface is attached; the assumption is that
5754 * there is at least an outstanding IO reference count held already.
5755 * Most callers would call ifnet_is_attached() instead.
5756 */
5757 void
5758 ifnet_incr_iorefcnt(struct ifnet *ifp)
5759 {
5760 lck_mtx_lock_spin(&ifp->if_ref_lock);
5761 VERIFY(IF_FULLY_ATTACHED(ifp));
5762 VERIFY(ifp->if_refio > 0);
5763 ifp->if_refio++;
5764 lck_mtx_unlock(&ifp->if_ref_lock);
5765 }
5766
5767 void
5768 ifnet_decr_iorefcnt(struct ifnet *ifp)
5769 {
5770 lck_mtx_lock_spin(&ifp->if_ref_lock);
5771 VERIFY(ifp->if_refio > 0);
5772 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
5773 ifp->if_refio--;
5774
5775 /*
5776 * if there are no more outstanding io references, wakeup the
5777 * ifnet_detach thread if detaching flag is set.
5778 */
5779 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
5780 wakeup(&(ifp->if_refio));
5781 }
5782
5783 lck_mtx_unlock(&ifp->if_ref_lock);
5784 }
5785
5786 static void
5787 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
5788 {
5789 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
5790 ctrace_t *tr;
5791 u_int32_t idx;
5792 u_int16_t *cnt;
5793
5794 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
5795 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
5796 /* NOTREACHED */
5797 }
5798
5799 if (refhold) {
5800 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
5801 tr = dl_if_dbg->dldbg_if_refhold;
5802 } else {
5803 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
5804 tr = dl_if_dbg->dldbg_if_refrele;
5805 }
5806
5807 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
5808 ctrace_record(&tr[idx]);
5809 }
5810
5811 errno_t
5812 dlil_if_ref(struct ifnet *ifp)
5813 {
5814 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5815
5816 if (dl_if == NULL) {
5817 return EINVAL;
5818 }
5819
5820 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5821 ++dl_if->dl_if_refcnt;
5822 if (dl_if->dl_if_refcnt == 0) {
5823 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
5824 /* NOTREACHED */
5825 }
5826 if (dl_if->dl_if_trace != NULL) {
5827 (*dl_if->dl_if_trace)(dl_if, TRUE);
5828 }
5829 lck_mtx_unlock(&dl_if->dl_if_lock);
5830
5831 return 0;
5832 }
5833
5834 errno_t
5835 dlil_if_free(struct ifnet *ifp)
5836 {
5837 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5838 bool need_release = FALSE;
5839
5840 if (dl_if == NULL) {
5841 return EINVAL;
5842 }
5843
5844 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5845 switch (dl_if->dl_if_refcnt) {
5846 case 0:
5847 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
5848 /* NOTREACHED */
5849 break;
5850 case 1:
5851 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
5852 need_release = TRUE;
5853 }
5854 break;
5855 default:
5856 break;
5857 }
5858 --dl_if->dl_if_refcnt;
5859 if (dl_if->dl_if_trace != NULL) {
5860 (*dl_if->dl_if_trace)(dl_if, FALSE);
5861 }
5862 lck_mtx_unlock(&dl_if->dl_if_lock);
5863 if (need_release) {
5864 dlil_if_release(ifp);
5865 }
5866 return 0;
5867 }
5868
5869 static errno_t
5870 dlil_attach_protocol_internal(struct if_proto *proto,
5871 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
5872 uint32_t * proto_count)
5873 {
5874 struct kev_dl_proto_data ev_pr_data;
5875 struct ifnet *ifp = proto->ifp;
5876 int retval = 0;
5877 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
5878 struct if_proto *prev_proto;
5879 struct if_proto *_proto;
5880
5881 /* callee holds a proto refcnt upon success */
5882 ifnet_lock_exclusive(ifp);
5883 _proto = find_attached_proto(ifp, proto->protocol_family);
5884 if (_proto != NULL) {
5885 ifnet_lock_done(ifp);
5886 if_proto_free(_proto);
5887 return EEXIST;
5888 }
5889
5890 /*
5891 * Call family module add_proto routine so it can refine the
5892 * demux descriptors as it wishes.
5893 */
5894 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5895 demux_count);
5896 if (retval) {
5897 ifnet_lock_done(ifp);
5898 return retval;
5899 }
5900
5901 /*
5902 * Insert the protocol in the hash
5903 */
5904 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5905 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
5906 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5907 }
5908 if (prev_proto) {
5909 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5910 } else {
5911 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5912 proto, next_hash);
5913 }
5914
5915 /* hold a proto refcnt for attach */
5916 if_proto_ref(proto);
5917
5918 /*
5919 * The reserved field carries the number of protocol still attached
5920 * (subject to change)
5921 */
5922 ev_pr_data.proto_family = proto->protocol_family;
5923 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
5924
5925 ifnet_lock_done(ifp);
5926
5927 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5928 (struct net_event_data *)&ev_pr_data,
5929 sizeof(struct kev_dl_proto_data));
5930 if (proto_count != NULL) {
5931 *proto_count = ev_pr_data.proto_remaining_count;
5932 }
5933 return retval;
5934 }
5935
5936 errno_t
5937 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
5938 const struct ifnet_attach_proto_param *proto_details)
5939 {
5940 int retval = 0;
5941 struct if_proto *ifproto = NULL;
5942 uint32_t proto_count = 0;
5943
5944 ifnet_head_lock_shared();
5945 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5946 retval = EINVAL;
5947 goto end;
5948 }
5949 /* Check that the interface is in the global list */
5950 if (!ifnet_lookup(ifp)) {
5951 retval = ENXIO;
5952 goto end;
5953 }
5954
5955 ifproto = zalloc(dlif_proto_zone);
5956 if (ifproto == NULL) {
5957 retval = ENOMEM;
5958 goto end;
5959 }
5960 bzero(ifproto, dlif_proto_size);
5961
5962 /* refcnt held above during lookup */
5963 ifproto->ifp = ifp;
5964 ifproto->protocol_family = protocol;
5965 ifproto->proto_kpi = kProtoKPI_v1;
5966 ifproto->kpi.v1.input = proto_details->input;
5967 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5968 ifproto->kpi.v1.event = proto_details->event;
5969 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5970 ifproto->kpi.v1.detached = proto_details->detached;
5971 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5972 ifproto->kpi.v1.send_arp = proto_details->send_arp;
5973
5974 retval = dlil_attach_protocol_internal(ifproto,
5975 proto_details->demux_list, proto_details->demux_count,
5976 &proto_count);
5977
5978 end:
5979 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5980 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5981 if_name(ifp), protocol, retval);
5982 } else {
5983 if (dlil_verbose) {
5984 printf("%s: attached v1 protocol %d (count = %d)\n",
5985 if_name(ifp),
5986 protocol, proto_count);
5987 }
5988 }
5989 ifnet_head_done();
5990 if (retval == 0) {
5991 /*
5992 * A protocol has been attached, mark the interface up.
5993 * This used to be done by configd.KernelEventMonitor, but that
5994 * is inherently prone to races (rdar://problem/30810208).
5995 */
5996 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5997 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5998 dlil_post_sifflags_msg(ifp);
5999 } else if (ifproto != NULL) {
6000 zfree(dlif_proto_zone, ifproto);
6001 }
6002 return retval;
6003 }
6004
6005 errno_t
6006 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
6007 const struct ifnet_attach_proto_param_v2 *proto_details)
6008 {
6009 int retval = 0;
6010 struct if_proto *ifproto = NULL;
6011 uint32_t proto_count = 0;
6012
6013 ifnet_head_lock_shared();
6014 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
6015 retval = EINVAL;
6016 goto end;
6017 }
6018 /* Check that the interface is in the global list */
6019 if (!ifnet_lookup(ifp)) {
6020 retval = ENXIO;
6021 goto end;
6022 }
6023
6024 ifproto = zalloc(dlif_proto_zone);
6025 if (ifproto == NULL) {
6026 retval = ENOMEM;
6027 goto end;
6028 }
6029 bzero(ifproto, sizeof(*ifproto));
6030
6031 /* refcnt held above during lookup */
6032 ifproto->ifp = ifp;
6033 ifproto->protocol_family = protocol;
6034 ifproto->proto_kpi = kProtoKPI_v2;
6035 ifproto->kpi.v2.input = proto_details->input;
6036 ifproto->kpi.v2.pre_output = proto_details->pre_output;
6037 ifproto->kpi.v2.event = proto_details->event;
6038 ifproto->kpi.v2.ioctl = proto_details->ioctl;
6039 ifproto->kpi.v2.detached = proto_details->detached;
6040 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
6041 ifproto->kpi.v2.send_arp = proto_details->send_arp;
6042
6043 retval = dlil_attach_protocol_internal(ifproto,
6044 proto_details->demux_list, proto_details->demux_count,
6045 &proto_count);
6046
6047 end:
6048 if (retval != 0 && retval != EEXIST && ifp != NULL) {
6049 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
6050 if_name(ifp), protocol, retval);
6051 } else {
6052 if (dlil_verbose) {
6053 printf("%s: attached v2 protocol %d (count = %d)\n",
6054 if_name(ifp),
6055 protocol, proto_count);
6056 }
6057 }
6058 ifnet_head_done();
6059 if (retval == 0) {
6060 /*
6061 * A protocol has been attached, mark the interface up.
6062 * This used to be done by configd.KernelEventMonitor, but that
6063 * is inherently prone to races (rdar://problem/30810208).
6064 */
6065 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
6066 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
6067 dlil_post_sifflags_msg(ifp);
6068 } else if (ifproto != NULL) {
6069 zfree(dlif_proto_zone, ifproto);
6070 }
6071 return retval;
6072 }
6073
6074 errno_t
6075 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
6076 {
6077 struct if_proto *proto = NULL;
6078 int retval = 0;
6079
6080 if (ifp == NULL || proto_family == 0) {
6081 retval = EINVAL;
6082 goto end;
6083 }
6084
6085 ifnet_lock_exclusive(ifp);
6086 /* callee holds a proto refcnt upon success */
6087 proto = find_attached_proto(ifp, proto_family);
6088 if (proto == NULL) {
6089 retval = ENXIO;
6090 ifnet_lock_done(ifp);
6091 goto end;
6092 }
6093
6094 /* call family module del_proto */
6095 if (ifp->if_del_proto) {
6096 ifp->if_del_proto(ifp, proto->protocol_family);
6097 }
6098
6099 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
6100 proto, if_proto, next_hash);
6101
6102 if (proto->proto_kpi == kProtoKPI_v1) {
6103 proto->kpi.v1.input = ifproto_media_input_v1;
6104 proto->kpi.v1.pre_output = ifproto_media_preout;
6105 proto->kpi.v1.event = ifproto_media_event;
6106 proto->kpi.v1.ioctl = ifproto_media_ioctl;
6107 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
6108 proto->kpi.v1.send_arp = ifproto_media_send_arp;
6109 } else {
6110 proto->kpi.v2.input = ifproto_media_input_v2;
6111 proto->kpi.v2.pre_output = ifproto_media_preout;
6112 proto->kpi.v2.event = ifproto_media_event;
6113 proto->kpi.v2.ioctl = ifproto_media_ioctl;
6114 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
6115 proto->kpi.v2.send_arp = ifproto_media_send_arp;
6116 }
6117 proto->detached = 1;
6118 ifnet_lock_done(ifp);
6119
6120 if (dlil_verbose) {
6121 printf("%s: detached %s protocol %d\n", if_name(ifp),
6122 (proto->proto_kpi == kProtoKPI_v1) ?
6123 "v1" : "v2", proto_family);
6124 }
6125
6126 /* release proto refcnt held during protocol attach */
6127 if_proto_free(proto);
6128
6129 /*
6130 * Release proto refcnt held during lookup; the rest of
6131 * protocol detach steps will happen when the last proto
6132 * reference is released.
6133 */
6134 if_proto_free(proto);
6135
6136 end:
6137 return retval;
6138 }
6139
6140
6141 static errno_t
6142 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
6143 struct mbuf *packet, char *header)
6144 {
6145 #pragma unused(ifp, protocol, packet, header)
6146 return ENXIO;
6147 }
6148
6149 static errno_t
6150 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
6151 struct mbuf *packet)
6152 {
6153 #pragma unused(ifp, protocol, packet)
6154 return ENXIO;
6155 }
6156
6157 static errno_t
6158 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
6159 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
6160 char *link_layer_dest)
6161 {
6162 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
6163 return ENXIO;
6164 }
6165
6166 static void
6167 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6168 const struct kev_msg *event)
6169 {
6170 #pragma unused(ifp, protocol, event)
6171 }
6172
6173 static errno_t
6174 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6175 unsigned long command, void *argument)
6176 {
6177 #pragma unused(ifp, protocol, command, argument)
6178 return ENXIO;
6179 }
6180
6181 static errno_t
6182 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6183 struct sockaddr_dl *out_ll, size_t ll_len)
6184 {
6185 #pragma unused(ifp, proto_addr, out_ll, ll_len)
6186 return ENXIO;
6187 }
6188
6189 static errno_t
6190 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6191 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6192 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6193 {
6194 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6195 return ENXIO;
6196 }
6197
6198 extern int if_next_index(void);
6199 extern int tcp_ecn_outbound;
6200
6201 errno_t
6202 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
6203 {
6204 struct ifnet *tmp_if;
6205 struct ifaddr *ifa;
6206 struct if_data_internal if_data_saved;
6207 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6208 struct dlil_threading_info *dl_inp;
6209 u_int32_t sflags = 0;
6210 int err;
6211
6212 if (ifp == NULL) {
6213 return EINVAL;
6214 }
6215
6216 /*
6217 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6218 * prevent the interface from being configured while it is
6219 * embryonic, as ifnet_head_lock is dropped and reacquired
6220 * below prior to marking the ifnet with IFRF_ATTACHED.
6221 */
6222 dlil_if_lock();
6223 ifnet_head_lock_exclusive();
6224 /* Verify we aren't already on the list */
6225 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
6226 if (tmp_if == ifp) {
6227 ifnet_head_done();
6228 dlil_if_unlock();
6229 return EEXIST;
6230 }
6231 }
6232
6233 lck_mtx_lock_spin(&ifp->if_ref_lock);
6234 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
6235 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6236 __func__, ifp);
6237 /* NOTREACHED */
6238 }
6239 lck_mtx_unlock(&ifp->if_ref_lock);
6240
6241 ifnet_lock_exclusive(ifp);
6242
6243 /* Sanity check */
6244 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6245 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6246
6247 if (ll_addr != NULL) {
6248 if (ifp->if_addrlen == 0) {
6249 ifp->if_addrlen = ll_addr->sdl_alen;
6250 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
6251 ifnet_lock_done(ifp);
6252 ifnet_head_done();
6253 dlil_if_unlock();
6254 return EINVAL;
6255 }
6256 }
6257
6258 /*
6259 * Allow interfaces without protocol families to attach
6260 * only if they have the necessary fields filled out.
6261 */
6262 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
6263 DLIL_PRINTF("%s: Attempt to attach interface without "
6264 "family module - %d\n", __func__, ifp->if_family);
6265 ifnet_lock_done(ifp);
6266 ifnet_head_done();
6267 dlil_if_unlock();
6268 return ENODEV;
6269 }
6270
6271 /* Allocate protocol hash table */
6272 VERIFY(ifp->if_proto_hash == NULL);
6273 ifp->if_proto_hash = zalloc(dlif_phash_zone);
6274 if (ifp->if_proto_hash == NULL) {
6275 ifnet_lock_done(ifp);
6276 ifnet_head_done();
6277 dlil_if_unlock();
6278 return ENOBUFS;
6279 }
6280 bzero(ifp->if_proto_hash, dlif_phash_size);
6281
6282 lck_mtx_lock_spin(&ifp->if_flt_lock);
6283 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6284 TAILQ_INIT(&ifp->if_flt_head);
6285 VERIFY(ifp->if_flt_busy == 0);
6286 VERIFY(ifp->if_flt_waiters == 0);
6287 lck_mtx_unlock(&ifp->if_flt_lock);
6288
6289 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
6290 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
6291 LIST_INIT(&ifp->if_multiaddrs);
6292 }
6293
6294 VERIFY(ifp->if_allhostsinm == NULL);
6295 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6296 TAILQ_INIT(&ifp->if_addrhead);
6297
6298 if (ifp->if_index == 0) {
6299 int idx = if_next_index();
6300
6301 if (idx == -1) {
6302 ifp->if_index = 0;
6303 ifnet_lock_done(ifp);
6304 ifnet_head_done();
6305 dlil_if_unlock();
6306 return ENOBUFS;
6307 }
6308 ifp->if_index = idx;
6309 }
6310 /* There should not be anything occupying this slot */
6311 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6312
6313 /* allocate (if needed) and initialize a link address */
6314 ifa = dlil_alloc_lladdr(ifp, ll_addr);
6315 if (ifa == NULL) {
6316 ifnet_lock_done(ifp);
6317 ifnet_head_done();
6318 dlil_if_unlock();
6319 return ENOBUFS;
6320 }
6321
6322 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
6323 ifnet_addrs[ifp->if_index - 1] = ifa;
6324
6325 /* make this address the first on the list */
6326 IFA_LOCK(ifa);
6327 /* hold a reference for ifnet_addrs[] */
6328 IFA_ADDREF_LOCKED(ifa);
6329 /* if_attach_link_ifa() holds a reference for ifa_link */
6330 if_attach_link_ifa(ifp, ifa);
6331 IFA_UNLOCK(ifa);
6332
6333 #if CONFIG_MACF_NET
6334 mac_ifnet_label_associate(ifp);
6335 #endif
6336
6337 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
6338 ifindex2ifnet[ifp->if_index] = ifp;
6339
6340 /* Hold a reference to the underlying dlil_ifnet */
6341 ifnet_reference(ifp);
6342
6343 /* Clear stats (save and restore other fields that we care) */
6344 if_data_saved = ifp->if_data;
6345 bzero(&ifp->if_data, sizeof(ifp->if_data));
6346 ifp->if_data.ifi_type = if_data_saved.ifi_type;
6347 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
6348 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
6349 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
6350 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
6351 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
6352 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
6353 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
6354 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
6355 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
6356 ifnet_touch_lastchange(ifp);
6357
6358 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
6359 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
6360 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
6361
6362 /* By default, use SFB and enable flow advisory */
6363 sflags = PKTSCHEDF_QALG_SFB;
6364 if (if_flowadv) {
6365 sflags |= PKTSCHEDF_QALG_FLOWCTL;
6366 }
6367
6368 if (if_delaybased_queue) {
6369 sflags |= PKTSCHEDF_QALG_DELAYBASED;
6370 }
6371
6372 if (ifp->if_output_sched_model ==
6373 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
6374 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
6375 }
6376
6377 /* Initialize transmit queue(s) */
6378 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
6379 if (err != 0) {
6380 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6381 "err=%d", __func__, ifp, err);
6382 /* NOTREACHED */
6383 }
6384
6385 /* Sanity checks on the input thread storage */
6386 dl_inp = &dl_if->dl_if_inpstorage;
6387 bzero(&dl_inp->stats, sizeof(dl_inp->stats));
6388 VERIFY(dl_inp->input_waiting == 0);
6389 VERIFY(dl_inp->wtot == 0);
6390 VERIFY(dl_inp->ifp == NULL);
6391 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
6392 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
6393 VERIFY(!dl_inp->net_affinity);
6394 VERIFY(ifp->if_inp == NULL);
6395 VERIFY(dl_inp->input_thr == THREAD_NULL);
6396 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
6397 VERIFY(dl_inp->poll_thr == THREAD_NULL);
6398 VERIFY(dl_inp->tag == 0);
6399 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
6400 bzero(&dl_inp->tstats, sizeof(dl_inp->tstats));
6401 bzero(&dl_inp->pstats, sizeof(dl_inp->pstats));
6402 bzero(&dl_inp->sstats, sizeof(dl_inp->sstats));
6403 #if IFNET_INPUT_SANITY_CHK
6404 VERIFY(dl_inp->input_mbuf_cnt == 0);
6405 #endif /* IFNET_INPUT_SANITY_CHK */
6406
6407 /*
6408 * A specific DLIL input thread is created per Ethernet/cellular
6409 * interface or for an interface which supports opportunistic
6410 * input polling. Pseudo interfaces or other types of interfaces
6411 * use the main input thread instead.
6412 */
6413 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
6414 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
6415 ifp->if_inp = dl_inp;
6416 err = dlil_create_input_thread(ifp, ifp->if_inp);
6417 if (err != 0) {
6418 panic_plain("%s: ifp=%p couldn't get an input thread; "
6419 "err=%d", __func__, ifp, err);
6420 /* NOTREACHED */
6421 }
6422 }
6423
6424 if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) {
6425 ifp->if_inp->input_mit_tcall =
6426 thread_call_allocate_with_priority(dlil_mit_tcall_fn,
6427 ifp, THREAD_CALL_PRIORITY_KERNEL);
6428 }
6429
6430 /*
6431 * If the driver supports the new transmit model, calculate flow hash
6432 * and create a workloop starter thread to invoke the if_start callback
6433 * where the packets may be dequeued and transmitted.
6434 */
6435 if (ifp->if_eflags & IFEF_TXSTART) {
6436 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
6437 VERIFY(ifp->if_flowhash != 0);
6438 VERIFY(ifp->if_start_thread == THREAD_NULL);
6439
6440 ifnet_set_start_cycle(ifp, NULL);
6441 ifp->if_start_active = 0;
6442 ifp->if_start_req = 0;
6443 ifp->if_start_flags = 0;
6444 VERIFY(ifp->if_start != NULL);
6445 if ((err = kernel_thread_start(ifnet_start_thread_fn,
6446 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
6447 panic_plain("%s: "
6448 "ifp=%p couldn't get a start thread; "
6449 "err=%d", __func__, ifp, err);
6450 /* NOTREACHED */
6451 }
6452 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
6453 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
6454 } else {
6455 ifp->if_flowhash = 0;
6456 }
6457
6458 /*
6459 * If the driver supports the new receive model, create a poller
6460 * thread to invoke if_input_poll callback where the packets may
6461 * be dequeued from the driver and processed for reception.
6462 */
6463 if (ifp->if_eflags & IFEF_RXPOLL) {
6464 VERIFY(ifp->if_input_poll != NULL);
6465 VERIFY(ifp->if_input_ctl != NULL);
6466 VERIFY(ifp->if_poll_thread == THREAD_NULL);
6467
6468 ifnet_set_poll_cycle(ifp, NULL);
6469 ifp->if_poll_update = 0;
6470 ifp->if_poll_active = 0;
6471 ifp->if_poll_req = 0;
6472 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
6473 &ifp->if_poll_thread)) != KERN_SUCCESS) {
6474 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6475 "err=%d", __func__, ifp, err);
6476 /* NOTREACHED */
6477 }
6478 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
6479 (MACHINE_NETWORK_GROUP | MACHINE_NETWORK_WORKLOOP));
6480 }
6481
6482 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6483 VERIFY(ifp->if_desc.ifd_len == 0);
6484 VERIFY(ifp->if_desc.ifd_desc != NULL);
6485
6486 /* Record attach PC stacktrace */
6487 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
6488
6489 ifp->if_updatemcasts = 0;
6490 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
6491 struct ifmultiaddr *ifma;
6492 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
6493 IFMA_LOCK(ifma);
6494 if (ifma->ifma_addr->sa_family == AF_LINK ||
6495 ifma->ifma_addr->sa_family == AF_UNSPEC) {
6496 ifp->if_updatemcasts++;
6497 }
6498 IFMA_UNLOCK(ifma);
6499 }
6500
6501 printf("%s: attached with %d suspended link-layer multicast "
6502 "membership(s)\n", if_name(ifp),
6503 ifp->if_updatemcasts);
6504 }
6505
6506 /* Clear logging parameters */
6507 bzero(&ifp->if_log, sizeof(ifp->if_log));
6508
6509 /* Clear foreground/realtime activity timestamps */
6510 ifp->if_fg_sendts = 0;
6511 ifp->if_rt_sendts = 0;
6512
6513 VERIFY(ifp->if_delegated.ifp == NULL);
6514 VERIFY(ifp->if_delegated.type == 0);
6515 VERIFY(ifp->if_delegated.family == 0);
6516 VERIFY(ifp->if_delegated.subfamily == 0);
6517 VERIFY(ifp->if_delegated.expensive == 0);
6518
6519 VERIFY(ifp->if_agentids == NULL);
6520 VERIFY(ifp->if_agentcount == 0);
6521
6522 /* Reset interface state */
6523 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6524 ifp->if_interface_state.valid_bitmask |=
6525 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6526 ifp->if_interface_state.interface_availability =
6527 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
6528
6529 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
6530 if (ifp == lo_ifp) {
6531 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
6532 ifp->if_interface_state.valid_bitmask |=
6533 IF_INTERFACE_STATE_LQM_STATE_VALID;
6534 } else {
6535 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
6536 }
6537
6538 /*
6539 * Enable ECN capability on this interface depending on the
6540 * value of ECN global setting
6541 */
6542 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
6543 ifp->if_eflags |= IFEF_ECN_ENABLE;
6544 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6545 }
6546
6547 /*
6548 * Built-in Cyclops always on policy for WiFi infra
6549 */
6550 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
6551 errno_t error;
6552
6553 error = if_set_qosmarking_mode(ifp,
6554 IFRTYPE_QOSMARKING_FASTLANE);
6555 if (error != 0) {
6556 printf("%s if_set_qosmarking_mode(%s) error %d\n",
6557 __func__, ifp->if_xname, error);
6558 } else {
6559 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
6560 #if (DEVELOPMENT || DEBUG)
6561 printf("%s fastlane enabled on %s\n",
6562 __func__, ifp->if_xname);
6563 #endif /* (DEVELOPMENT || DEBUG) */
6564 }
6565 }
6566
6567 ifnet_lock_done(ifp);
6568 ifnet_head_done();
6569
6570
6571 lck_mtx_lock(&ifp->if_cached_route_lock);
6572 /* Enable forwarding cached route */
6573 ifp->if_fwd_cacheok = 1;
6574 /* Clean up any existing cached routes */
6575 ROUTE_RELEASE(&ifp->if_fwd_route);
6576 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
6577 ROUTE_RELEASE(&ifp->if_src_route);
6578 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
6579 ROUTE_RELEASE(&ifp->if_src_route6);
6580 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6581 lck_mtx_unlock(&ifp->if_cached_route_lock);
6582
6583 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
6584
6585 /*
6586 * Allocate and attach IGMPv3/MLDv2 interface specific variables
6587 * and trees; do this before the ifnet is marked as attached.
6588 * The ifnet keeps the reference to the info structures even after
6589 * the ifnet is detached, since the network-layer records still
6590 * refer to the info structures even after that. This also
6591 * makes it possible for them to still function after the ifnet
6592 * is recycled or reattached.
6593 */
6594 #if INET
6595 if (IGMP_IFINFO(ifp) == NULL) {
6596 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
6597 VERIFY(IGMP_IFINFO(ifp) != NULL);
6598 } else {
6599 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
6600 igmp_domifreattach(IGMP_IFINFO(ifp));
6601 }
6602 #endif /* INET */
6603 #if INET6
6604 if (MLD_IFINFO(ifp) == NULL) {
6605 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
6606 VERIFY(MLD_IFINFO(ifp) != NULL);
6607 } else {
6608 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
6609 mld_domifreattach(MLD_IFINFO(ifp));
6610 }
6611 #endif /* INET6 */
6612
6613 VERIFY(ifp->if_data_threshold == 0);
6614 VERIFY(ifp->if_dt_tcall != NULL);
6615
6616 /*
6617 * Finally, mark this ifnet as attached.
6618 */
6619 lck_mtx_lock(rnh_lock);
6620 ifnet_lock_exclusive(ifp);
6621 lck_mtx_lock_spin(&ifp->if_ref_lock);
6622 ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */
6623 lck_mtx_unlock(&ifp->if_ref_lock);
6624 if (net_rtref) {
6625 /* boot-args override; enable idle notification */
6626 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
6627 IFRF_IDLE_NOTIFY);
6628 } else {
6629 /* apply previous request(s) to set the idle flags, if any */
6630 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
6631 ifp->if_idle_new_flags_mask);
6632 }
6633 ifnet_lock_done(ifp);
6634 lck_mtx_unlock(rnh_lock);
6635 dlil_if_unlock();
6636
6637 #if PF
6638 /*
6639 * Attach packet filter to this interface, if enabled.
6640 */
6641 pf_ifnet_hook(ifp, 1);
6642 #endif /* PF */
6643
6644 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
6645
6646 if (dlil_verbose) {
6647 printf("%s: attached%s\n", if_name(ifp),
6648 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
6649 }
6650
6651 return 0;
6652 }
6653
6654 /*
6655 * Prepare the storage for the first/permanent link address, which must
6656 * must have the same lifetime as the ifnet itself. Although the link
6657 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
6658 * its location in memory must never change as it may still be referred
6659 * to by some parts of the system afterwards (unfortunate implementation
6660 * artifacts inherited from BSD.)
6661 *
6662 * Caller must hold ifnet lock as writer.
6663 */
6664 static struct ifaddr *
6665 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
6666 {
6667 struct ifaddr *ifa, *oifa;
6668 struct sockaddr_dl *asdl, *msdl;
6669 char workbuf[IFNAMSIZ * 2];
6670 int namelen, masklen, socksize;
6671 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6672
6673 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
6674 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
6675
6676 namelen = snprintf(workbuf, sizeof(workbuf), "%s",
6677 if_name(ifp));
6678 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
6679 + ((namelen > 0) ? namelen : 0);
6680 socksize = masklen + ifp->if_addrlen;
6681 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6682 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
6683 socksize = sizeof(struct sockaddr_dl);
6684 }
6685 socksize = ROUNDUP(socksize);
6686 #undef ROUNDUP
6687
6688 ifa = ifp->if_lladdr;
6689 if (socksize > DLIL_SDLMAXLEN ||
6690 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
6691 /*
6692 * Rare, but in the event that the link address requires
6693 * more storage space than DLIL_SDLMAXLEN, allocate the
6694 * largest possible storages for address and mask, such
6695 * that we can reuse the same space when if_addrlen grows.
6696 * This same space will be used when if_addrlen shrinks.
6697 */
6698 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
6699 int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
6700 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
6701 if (ifa == NULL) {
6702 return NULL;
6703 }
6704 ifa_lock_init(ifa);
6705 /* Don't set IFD_ALLOC, as this is permanent */
6706 ifa->ifa_debug = IFD_LINK;
6707 }
6708 IFA_LOCK(ifa);
6709 /* address and mask sockaddr_dl locations */
6710 asdl = (struct sockaddr_dl *)(ifa + 1);
6711 bzero(asdl, SOCK_MAXADDRLEN);
6712 msdl = (struct sockaddr_dl *)(void *)
6713 ((char *)asdl + SOCK_MAXADDRLEN);
6714 bzero(msdl, SOCK_MAXADDRLEN);
6715 } else {
6716 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
6717 /*
6718 * Use the storage areas for address and mask within the
6719 * dlil_ifnet structure. This is the most common case.
6720 */
6721 if (ifa == NULL) {
6722 ifa = &dl_if->dl_if_lladdr.ifa;
6723 ifa_lock_init(ifa);
6724 /* Don't set IFD_ALLOC, as this is permanent */
6725 ifa->ifa_debug = IFD_LINK;
6726 }
6727 IFA_LOCK(ifa);
6728 /* address and mask sockaddr_dl locations */
6729 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
6730 bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
6731 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
6732 bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
6733 }
6734
6735 /* hold a permanent reference for the ifnet itself */
6736 IFA_ADDREF_LOCKED(ifa);
6737 oifa = ifp->if_lladdr;
6738 ifp->if_lladdr = ifa;
6739
6740 VERIFY(ifa->ifa_debug == IFD_LINK);
6741 ifa->ifa_ifp = ifp;
6742 ifa->ifa_rtrequest = link_rtrequest;
6743 ifa->ifa_addr = (struct sockaddr *)asdl;
6744 asdl->sdl_len = socksize;
6745 asdl->sdl_family = AF_LINK;
6746 if (namelen > 0) {
6747 bcopy(workbuf, asdl->sdl_data, min(namelen,
6748 sizeof(asdl->sdl_data)));
6749 asdl->sdl_nlen = namelen;
6750 } else {
6751 asdl->sdl_nlen = 0;
6752 }
6753 asdl->sdl_index = ifp->if_index;
6754 asdl->sdl_type = ifp->if_type;
6755 if (ll_addr != NULL) {
6756 asdl->sdl_alen = ll_addr->sdl_alen;
6757 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
6758 } else {
6759 asdl->sdl_alen = 0;
6760 }
6761 ifa->ifa_netmask = (struct sockaddr *)msdl;
6762 msdl->sdl_len = masklen;
6763 while (namelen > 0) {
6764 msdl->sdl_data[--namelen] = 0xff;
6765 }
6766 IFA_UNLOCK(ifa);
6767
6768 if (oifa != NULL) {
6769 IFA_REMREF(oifa);
6770 }
6771
6772 return ifa;
6773 }
6774
6775 static void
6776 if_purgeaddrs(struct ifnet *ifp)
6777 {
6778 #if INET
6779 in_purgeaddrs(ifp);
6780 #endif /* INET */
6781 #if INET6
6782 in6_purgeaddrs(ifp);
6783 #endif /* INET6 */
6784 }
6785
6786 errno_t
6787 ifnet_detach(ifnet_t ifp)
6788 {
6789 struct ifnet *delegated_ifp;
6790 struct nd_ifinfo *ndi = NULL;
6791
6792 if (ifp == NULL) {
6793 return EINVAL;
6794 }
6795
6796 ndi = ND_IFINFO(ifp);
6797 if (NULL != ndi) {
6798 ndi->cga_initialized = FALSE;
6799 }
6800
6801 lck_mtx_lock(rnh_lock);
6802 ifnet_head_lock_exclusive();
6803 ifnet_lock_exclusive(ifp);
6804
6805 /*
6806 * Check to see if this interface has previously triggered
6807 * aggressive protocol draining; if so, decrement the global
6808 * refcnt and clear PR_AGGDRAIN on the route domain if
6809 * there are no more of such an interface around.
6810 */
6811 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
6812
6813 lck_mtx_lock_spin(&ifp->if_ref_lock);
6814 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6815 lck_mtx_unlock(&ifp->if_ref_lock);
6816 ifnet_lock_done(ifp);
6817 ifnet_head_done();
6818 lck_mtx_unlock(rnh_lock);
6819 return EINVAL;
6820 } else if (ifp->if_refflags & IFRF_DETACHING) {
6821 /* Interface has already been detached */
6822 lck_mtx_unlock(&ifp->if_ref_lock);
6823 ifnet_lock_done(ifp);
6824 ifnet_head_done();
6825 lck_mtx_unlock(rnh_lock);
6826 return ENXIO;
6827 }
6828 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6829 /* Indicate this interface is being detached */
6830 ifp->if_refflags &= ~IFRF_ATTACHED;
6831 ifp->if_refflags |= IFRF_DETACHING;
6832 lck_mtx_unlock(&ifp->if_ref_lock);
6833
6834 if (dlil_verbose) {
6835 printf("%s: detaching\n", if_name(ifp));
6836 }
6837
6838 /* clean up flow control entry object if there's any */
6839 if (ifp->if_eflags & IFEF_TXSTART) {
6840 ifnet_flowadv(ifp->if_flowhash);
6841 }
6842
6843 /* Reset ECN enable/disable flags */
6844 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6845 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
6846
6847 /* Reset CLAT46 flag */
6848 ifp->if_eflags &= ~IFEF_CLAT46;
6849
6850 /*
6851 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6852 * no longer be visible during lookups from this point.
6853 */
6854 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
6855 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
6856 ifp->if_link.tqe_next = NULL;
6857 ifp->if_link.tqe_prev = NULL;
6858 if (ifp->if_ordered_link.tqe_next != NULL ||
6859 ifp->if_ordered_link.tqe_prev != NULL) {
6860 ifnet_remove_from_ordered_list(ifp);
6861 }
6862 ifindex2ifnet[ifp->if_index] = NULL;
6863
6864 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6865 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
6866
6867 /* Record detach PC stacktrace */
6868 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
6869
6870 /* Clear logging parameters */
6871 bzero(&ifp->if_log, sizeof(ifp->if_log));
6872
6873 /* Clear delegated interface info (reference released below) */
6874 delegated_ifp = ifp->if_delegated.ifp;
6875 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
6876
6877 /* Reset interface state */
6878 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6879
6880 ifnet_lock_done(ifp);
6881 ifnet_head_done();
6882 lck_mtx_unlock(rnh_lock);
6883
6884
6885 /* Release reference held on the delegated interface */
6886 if (delegated_ifp != NULL) {
6887 ifnet_release(delegated_ifp);
6888 }
6889
6890 /* Reset Link Quality Metric (unless loopback [lo0]) */
6891 if (ifp != lo_ifp) {
6892 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
6893 }
6894
6895 /* Reset TCP local statistics */
6896 if (ifp->if_tcp_stat != NULL) {
6897 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
6898 }
6899
6900 /* Reset UDP local statistics */
6901 if (ifp->if_udp_stat != NULL) {
6902 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
6903 }
6904
6905 /* Reset ifnet IPv4 stats */
6906 if (ifp->if_ipv4_stat != NULL) {
6907 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
6908 }
6909
6910 /* Reset ifnet IPv6 stats */
6911 if (ifp->if_ipv6_stat != NULL) {
6912 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
6913 }
6914
6915 /* Release memory held for interface link status report */
6916 if (ifp->if_link_status != NULL) {
6917 FREE(ifp->if_link_status, M_TEMP);
6918 ifp->if_link_status = NULL;
6919 }
6920
6921 /* Clear agent IDs */
6922 if (ifp->if_agentids != NULL) {
6923 FREE(ifp->if_agentids, M_NETAGENT);
6924 ifp->if_agentids = NULL;
6925 }
6926 ifp->if_agentcount = 0;
6927
6928
6929 /* Let BPF know we're detaching */
6930 bpfdetach(ifp);
6931
6932 /* Mark the interface as DOWN */
6933 if_down(ifp);
6934
6935 /* Disable forwarding cached route */
6936 lck_mtx_lock(&ifp->if_cached_route_lock);
6937 ifp->if_fwd_cacheok = 0;
6938 lck_mtx_unlock(&ifp->if_cached_route_lock);
6939
6940 /* Disable data threshold and wait for any pending event posting */
6941 ifp->if_data_threshold = 0;
6942 VERIFY(ifp->if_dt_tcall != NULL);
6943 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
6944
6945 /*
6946 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6947 * references to the info structures and leave them attached to
6948 * this ifnet.
6949 */
6950 #if INET
6951 igmp_domifdetach(ifp);
6952 #endif /* INET */
6953 #if INET6
6954 mld_domifdetach(ifp);
6955 #endif /* INET6 */
6956
6957 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
6958
6959 /* Let worker thread take care of the rest, to avoid reentrancy */
6960 dlil_if_lock();
6961 ifnet_detaching_enqueue(ifp);
6962 dlil_if_unlock();
6963
6964 return 0;
6965 }
6966
6967 static void
6968 ifnet_detaching_enqueue(struct ifnet *ifp)
6969 {
6970 dlil_if_lock_assert();
6971
6972 ++ifnet_detaching_cnt;
6973 VERIFY(ifnet_detaching_cnt != 0);
6974 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6975 wakeup((caddr_t)&ifnet_delayed_run);
6976 }
6977
6978 static struct ifnet *
6979 ifnet_detaching_dequeue(void)
6980 {
6981 struct ifnet *ifp;
6982
6983 dlil_if_lock_assert();
6984
6985 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6986 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6987 if (ifp != NULL) {
6988 VERIFY(ifnet_detaching_cnt != 0);
6989 --ifnet_detaching_cnt;
6990 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6991 ifp->if_detaching_link.tqe_next = NULL;
6992 ifp->if_detaching_link.tqe_prev = NULL;
6993 }
6994 return ifp;
6995 }
6996
6997 static int
6998 ifnet_detacher_thread_cont(int err)
6999 {
7000 #pragma unused(err)
7001 struct ifnet *ifp;
7002
7003 for (;;) {
7004 dlil_if_lock_assert();
7005 while (ifnet_detaching_cnt == 0) {
7006 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7007 (PZERO - 1), "ifnet_detacher_cont", 0,
7008 ifnet_detacher_thread_cont);
7009 /* NOTREACHED */
7010 }
7011
7012 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
7013
7014 /* Take care of detaching ifnet */
7015 ifp = ifnet_detaching_dequeue();
7016 if (ifp != NULL) {
7017 dlil_if_unlock();
7018 ifnet_detach_final(ifp);
7019 dlil_if_lock();
7020 }
7021 }
7022 }
7023
7024 static void
7025 ifnet_detacher_thread_func(void *v, wait_result_t w)
7026 {
7027 #pragma unused(v, w)
7028 dlil_if_lock();
7029 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
7030 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
7031 /*
7032 * msleep0() shouldn't have returned as PCATCH was not set;
7033 * therefore assert in this case.
7034 */
7035 dlil_if_unlock();
7036 VERIFY(0);
7037 }
7038
7039 static void
7040 ifnet_detach_final(struct ifnet *ifp)
7041 {
7042 struct ifnet_filter *filter, *filter_next;
7043 struct ifnet_filter_head fhead;
7044 struct dlil_threading_info *inp;
7045 struct ifaddr *ifa;
7046 ifnet_detached_func if_free;
7047 int i;
7048
7049 lck_mtx_lock(&ifp->if_ref_lock);
7050 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7051 panic("%s: flags mismatch (detaching not set) ifp=%p",
7052 __func__, ifp);
7053 /* NOTREACHED */
7054 }
7055
7056 /*
7057 * Wait until the existing IO references get released
7058 * before we proceed with ifnet_detach. This is not a
7059 * common case, so block without using a continuation.
7060 */
7061 while (ifp->if_refio > 0) {
7062 printf("%s: Waiting for IO references on %s interface "
7063 "to be released\n", __func__, if_name(ifp));
7064 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
7065 (PZERO - 1), "ifnet_ioref_wait", NULL);
7066 }
7067 lck_mtx_unlock(&ifp->if_ref_lock);
7068
7069 /* Drain and destroy send queue */
7070 ifclassq_teardown(ifp);
7071
7072 /* Detach interface filters */
7073 lck_mtx_lock(&ifp->if_flt_lock);
7074 if_flt_monitor_enter(ifp);
7075
7076 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
7077 fhead = ifp->if_flt_head;
7078 TAILQ_INIT(&ifp->if_flt_head);
7079
7080 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
7081 filter_next = TAILQ_NEXT(filter, filt_next);
7082 lck_mtx_unlock(&ifp->if_flt_lock);
7083
7084 dlil_detach_filter_internal(filter, 1);
7085 lck_mtx_lock(&ifp->if_flt_lock);
7086 }
7087 if_flt_monitor_leave(ifp);
7088 lck_mtx_unlock(&ifp->if_flt_lock);
7089
7090 /* Tell upper layers to drop their network addresses */
7091 if_purgeaddrs(ifp);
7092
7093 ifnet_lock_exclusive(ifp);
7094
7095 /* Uplumb all protocols */
7096 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
7097 struct if_proto *proto;
7098
7099 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7100 while (proto != NULL) {
7101 protocol_family_t family = proto->protocol_family;
7102 ifnet_lock_done(ifp);
7103 proto_unplumb(family, ifp);
7104 ifnet_lock_exclusive(ifp);
7105 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
7106 }
7107 /* There should not be any protocols left */
7108 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
7109 }
7110 zfree(dlif_phash_zone, ifp->if_proto_hash);
7111 ifp->if_proto_hash = NULL;
7112
7113 /* Detach (permanent) link address from if_addrhead */
7114 ifa = TAILQ_FIRST(&ifp->if_addrhead);
7115 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
7116 IFA_LOCK(ifa);
7117 if_detach_link_ifa(ifp, ifa);
7118 IFA_UNLOCK(ifa);
7119
7120 /* Remove (permanent) link address from ifnet_addrs[] */
7121 IFA_REMREF(ifa);
7122 ifnet_addrs[ifp->if_index - 1] = NULL;
7123
7124 /* This interface should not be on {ifnet_head,detaching} */
7125 VERIFY(ifp->if_link.tqe_next == NULL);
7126 VERIFY(ifp->if_link.tqe_prev == NULL);
7127 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
7128 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
7129 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
7130 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
7131
7132 /* The slot should have been emptied */
7133 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
7134
7135 /* There should not be any addresses left */
7136 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
7137
7138 /*
7139 * Signal the starter thread to terminate itself.
7140 */
7141 if (ifp->if_start_thread != THREAD_NULL) {
7142 lck_mtx_lock_spin(&ifp->if_start_lock);
7143 ifp->if_start_flags = 0;
7144 ifp->if_start_thread = THREAD_NULL;
7145 wakeup_one((caddr_t)&ifp->if_start_thread);
7146 lck_mtx_unlock(&ifp->if_start_lock);
7147 }
7148
7149 /*
7150 * Signal the poller thread to terminate itself.
7151 */
7152 if (ifp->if_poll_thread != THREAD_NULL) {
7153 lck_mtx_lock_spin(&ifp->if_poll_lock);
7154 ifp->if_poll_thread = THREAD_NULL;
7155 wakeup_one((caddr_t)&ifp->if_poll_thread);
7156 lck_mtx_unlock(&ifp->if_poll_lock);
7157 }
7158
7159 /*
7160 * If thread affinity was set for the workloop thread, we will need
7161 * to tear down the affinity and release the extra reference count
7162 * taken at attach time. Does not apply to lo0 or other interfaces
7163 * without dedicated input threads.
7164 */
7165 if ((inp = ifp->if_inp) != NULL) {
7166 VERIFY(inp != dlil_main_input_thread);
7167
7168 if (inp->net_affinity) {
7169 struct thread *tp, *wtp, *ptp;
7170
7171 lck_mtx_lock_spin(&inp->input_lck);
7172 wtp = inp->wloop_thr;
7173 inp->wloop_thr = THREAD_NULL;
7174 ptp = inp->poll_thr;
7175 inp->poll_thr = THREAD_NULL;
7176 tp = inp->input_thr; /* don't nullify now */
7177 inp->tag = 0;
7178 inp->net_affinity = FALSE;
7179 lck_mtx_unlock(&inp->input_lck);
7180
7181 /* Tear down poll thread affinity */
7182 if (ptp != NULL) {
7183 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
7184 (void) dlil_affinity_set(ptp,
7185 THREAD_AFFINITY_TAG_NULL);
7186 thread_deallocate(ptp);
7187 }
7188
7189 /* Tear down workloop thread affinity */
7190 if (wtp != NULL) {
7191 (void) dlil_affinity_set(wtp,
7192 THREAD_AFFINITY_TAG_NULL);
7193 thread_deallocate(wtp);
7194 }
7195
7196 /* Tear down DLIL input thread affinity */
7197 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
7198 thread_deallocate(tp);
7199 }
7200
7201 /* disassociate ifp DLIL input thread */
7202 ifp->if_inp = NULL;
7203
7204 /* tell the input thread to terminate */
7205 lck_mtx_lock_spin(&inp->input_lck);
7206 inp->input_waiting |= DLIL_INPUT_TERMINATE;
7207 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
7208 wakeup_one((caddr_t)&inp->input_waiting);
7209 }
7210 lck_mtx_unlock(&inp->input_lck);
7211 ifnet_lock_done(ifp);
7212
7213 /* wait for the input thread to terminate */
7214 lck_mtx_lock_spin(&inp->input_lck);
7215 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
7216 == 0) {
7217 (void) msleep(&inp->input_waiting, &inp->input_lck,
7218 (PZERO - 1) | PSPIN, inp->input_name, NULL);
7219 }
7220 lck_mtx_unlock(&inp->input_lck);
7221 ifnet_lock_exclusive(ifp);
7222
7223 /* clean-up input thread state */
7224 dlil_clean_threading_info(inp);
7225 }
7226
7227 /* The driver might unload, so point these to ourselves */
7228 if_free = ifp->if_free;
7229 ifp->if_output_dlil = ifp_if_output;
7230 ifp->if_output = ifp_if_output;
7231 ifp->if_pre_enqueue = ifp_if_output;
7232 ifp->if_start = ifp_if_start;
7233 ifp->if_output_ctl = ifp_if_ctl;
7234 ifp->if_input_dlil = ifp_if_input;
7235 ifp->if_input_poll = ifp_if_input_poll;
7236 ifp->if_input_ctl = ifp_if_ctl;
7237 ifp->if_ioctl = ifp_if_ioctl;
7238 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
7239 ifp->if_free = ifp_if_free;
7240 ifp->if_demux = ifp_if_demux;
7241 ifp->if_event = ifp_if_event;
7242 ifp->if_framer_legacy = ifp_if_framer;
7243 ifp->if_framer = ifp_if_framer_extended;
7244 ifp->if_add_proto = ifp_if_add_proto;
7245 ifp->if_del_proto = ifp_if_del_proto;
7246 ifp->if_check_multi = ifp_if_check_multi;
7247
7248 /* wipe out interface description */
7249 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7250 ifp->if_desc.ifd_len = 0;
7251 VERIFY(ifp->if_desc.ifd_desc != NULL);
7252 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
7253
7254 /* there shouldn't be any delegation by now */
7255 VERIFY(ifp->if_delegated.ifp == NULL);
7256 VERIFY(ifp->if_delegated.type == 0);
7257 VERIFY(ifp->if_delegated.family == 0);
7258 VERIFY(ifp->if_delegated.subfamily == 0);
7259 VERIFY(ifp->if_delegated.expensive == 0);
7260
7261 /* QoS marking get cleared */
7262 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
7263 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
7264
7265
7266 ifnet_lock_done(ifp);
7267
7268 #if PF
7269 /*
7270 * Detach this interface from packet filter, if enabled.
7271 */
7272 pf_ifnet_hook(ifp, 0);
7273 #endif /* PF */
7274
7275 /* Filter list should be empty */
7276 lck_mtx_lock_spin(&ifp->if_flt_lock);
7277 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
7278 VERIFY(ifp->if_flt_busy == 0);
7279 VERIFY(ifp->if_flt_waiters == 0);
7280 lck_mtx_unlock(&ifp->if_flt_lock);
7281
7282 /* Last chance to drain send queue */
7283 if_qflush(ifp, 0);
7284
7285 /* Last chance to cleanup any cached route */
7286 lck_mtx_lock(&ifp->if_cached_route_lock);
7287 VERIFY(!ifp->if_fwd_cacheok);
7288 ROUTE_RELEASE(&ifp->if_fwd_route);
7289 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
7290 ROUTE_RELEASE(&ifp->if_src_route);
7291 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
7292 ROUTE_RELEASE(&ifp->if_src_route6);
7293 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
7294 lck_mtx_unlock(&ifp->if_cached_route_lock);
7295
7296 VERIFY(ifp->if_data_threshold == 0);
7297 VERIFY(ifp->if_dt_tcall != NULL);
7298 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
7299
7300 ifnet_llreach_ifdetach(ifp);
7301
7302 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
7303
7304 /*
7305 * Finally, mark this ifnet as detached.
7306 */
7307 lck_mtx_lock_spin(&ifp->if_ref_lock);
7308 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7309 panic("%s: flags mismatch (detaching not set) ifp=%p",
7310 __func__, ifp);
7311 /* NOTREACHED */
7312 }
7313 ifp->if_refflags &= ~IFRF_DETACHING;
7314 lck_mtx_unlock(&ifp->if_ref_lock);
7315 if (if_free != NULL) {
7316 if_free(ifp);
7317 }
7318
7319 if (dlil_verbose) {
7320 printf("%s: detached\n", if_name(ifp));
7321 }
7322
7323 /* Release reference held during ifnet attach */
7324 ifnet_release(ifp);
7325 }
7326
7327 errno_t
7328 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
7329 {
7330 #pragma unused(ifp)
7331 m_freem_list(m);
7332 return 0;
7333 }
7334
7335 void
7336 ifp_if_start(struct ifnet *ifp)
7337 {
7338 ifnet_purge(ifp);
7339 }
7340
7341 static errno_t
7342 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
7343 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
7344 boolean_t poll, struct thread *tp)
7345 {
7346 #pragma unused(ifp, m_tail, s, poll, tp)
7347 m_freem_list(m_head);
7348 return ENXIO;
7349 }
7350
7351 static void
7352 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
7353 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
7354 {
7355 #pragma unused(ifp, flags, max_cnt)
7356 if (m_head != NULL) {
7357 *m_head = NULL;
7358 }
7359 if (m_tail != NULL) {
7360 *m_tail = NULL;
7361 }
7362 if (cnt != NULL) {
7363 *cnt = 0;
7364 }
7365 if (len != NULL) {
7366 *len = 0;
7367 }
7368 }
7369
7370 static errno_t
7371 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
7372 {
7373 #pragma unused(ifp, cmd, arglen, arg)
7374 return EOPNOTSUPP;
7375 }
7376
7377 static errno_t
7378 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
7379 {
7380 #pragma unused(ifp, fh, pf)
7381 m_freem(m);
7382 return EJUSTRETURN;
7383 }
7384
7385 static errno_t
7386 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
7387 const struct ifnet_demux_desc *da, u_int32_t dc)
7388 {
7389 #pragma unused(ifp, pf, da, dc)
7390 return EINVAL;
7391 }
7392
7393 static errno_t
7394 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
7395 {
7396 #pragma unused(ifp, pf)
7397 return EINVAL;
7398 }
7399
7400 static errno_t
7401 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
7402 {
7403 #pragma unused(ifp, sa)
7404 return EOPNOTSUPP;
7405 }
7406
7407 #if CONFIG_EMBEDDED
7408 static errno_t
7409 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
7410 const struct sockaddr *sa, const char *ll, const char *t,
7411 u_int32_t *pre, u_int32_t *post)
7412 #else
7413 static errno_t
7414 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
7415 const struct sockaddr *sa, const char *ll, const char *t)
7416 #endif /* !CONFIG_EMBEDDED */
7417 {
7418 #pragma unused(ifp, m, sa, ll, t)
7419 #if CONFIG_EMBEDDED
7420 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
7421 #else
7422 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
7423 #endif /* !CONFIG_EMBEDDED */
7424 }
7425
7426 static errno_t
7427 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
7428 const struct sockaddr *sa, const char *ll, const char *t,
7429 u_int32_t *pre, u_int32_t *post)
7430 {
7431 #pragma unused(ifp, sa, ll, t)
7432 m_freem(*m);
7433 *m = NULL;
7434
7435 if (pre != NULL) {
7436 *pre = 0;
7437 }
7438 if (post != NULL) {
7439 *post = 0;
7440 }
7441
7442 return EJUSTRETURN;
7443 }
7444
7445 errno_t
7446 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
7447 {
7448 #pragma unused(ifp, cmd, arg)
7449 return EOPNOTSUPP;
7450 }
7451
7452 static errno_t
7453 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
7454 {
7455 #pragma unused(ifp, tm, f)
7456 /* XXX not sure what to do here */
7457 return 0;
7458 }
7459
7460 static void
7461 ifp_if_free(struct ifnet *ifp)
7462 {
7463 #pragma unused(ifp)
7464 }
7465
7466 static void
7467 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
7468 {
7469 #pragma unused(ifp, e)
7470 }
7471
7472 int
7473 dlil_if_acquire(u_int32_t family, const void *uniqueid,
7474 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
7475 {
7476 struct ifnet *ifp1 = NULL;
7477 struct dlil_ifnet *dlifp1 = NULL;
7478 void *buf, *base, **pbuf;
7479 int ret = 0;
7480
7481 VERIFY(*ifp == NULL);
7482 dlil_if_lock();
7483 /*
7484 * We absolutely can't have an interface with the same name
7485 * in in-use state.
7486 * To make sure of that list has to be traversed completely
7487 */
7488 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
7489 ifp1 = (struct ifnet *)dlifp1;
7490
7491 if (ifp1->if_family != family) {
7492 continue;
7493 }
7494
7495 /*
7496 * If interface is in use, return EBUSY if either unique id
7497 * or interface extended names are the same
7498 */
7499 lck_mtx_lock(&dlifp1->dl_if_lock);
7500 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
7501 if (dlifp1->dl_if_flags & DLIF_INUSE) {
7502 lck_mtx_unlock(&dlifp1->dl_if_lock);
7503 ret = EBUSY;
7504 goto end;
7505 }
7506 }
7507
7508 if (uniqueid_len) {
7509 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
7510 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
7511 if (dlifp1->dl_if_flags & DLIF_INUSE) {
7512 lck_mtx_unlock(&dlifp1->dl_if_lock);
7513 ret = EBUSY;
7514 goto end;
7515 } else {
7516 dlifp1->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
7517 /* Cache the first interface that can be recycled */
7518 if (*ifp == NULL) {
7519 *ifp = ifp1;
7520 }
7521 /*
7522 * XXX Do not break or jump to end as we have to traverse
7523 * the whole list to ensure there are no name collisions
7524 */
7525 }
7526 }
7527 }
7528 lck_mtx_unlock(&dlifp1->dl_if_lock);
7529 }
7530
7531 /* If there's an interface that can be recycled, use that */
7532 if (*ifp != NULL) {
7533 goto end;
7534 }
7535
7536 /* no interface found, allocate a new one */
7537 buf = zalloc(dlif_zone);
7538 if (buf == NULL) {
7539 ret = ENOMEM;
7540 goto end;
7541 }
7542 bzero(buf, dlif_bufsize);
7543
7544 /* Get the 64-bit aligned base address for this object */
7545 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
7546 sizeof(u_int64_t));
7547 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
7548
7549 /*
7550 * Wind back a pointer size from the aligned base and
7551 * save the original address so we can free it later.
7552 */
7553 pbuf = (void **)((intptr_t)base - sizeof(void *));
7554 *pbuf = buf;
7555 dlifp1 = base;
7556
7557 if (uniqueid_len) {
7558 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
7559 M_NKE, M_WAITOK);
7560 if (dlifp1->dl_if_uniqueid == NULL) {
7561 zfree(dlif_zone, buf);
7562 ret = ENOMEM;
7563 goto end;
7564 }
7565 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
7566 dlifp1->dl_if_uniqueid_len = uniqueid_len;
7567 }
7568
7569 ifp1 = (struct ifnet *)dlifp1;
7570 dlifp1->dl_if_flags = DLIF_INUSE;
7571 if (ifnet_debug) {
7572 dlifp1->dl_if_flags |= DLIF_DEBUG;
7573 dlifp1->dl_if_trace = dlil_if_trace;
7574 }
7575 ifp1->if_name = dlifp1->dl_if_namestorage;
7576 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
7577
7578 /* initialize interface description */
7579 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
7580 ifp1->if_desc.ifd_len = 0;
7581 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
7582
7583
7584 #if CONFIG_MACF_NET
7585 mac_ifnet_label_init(ifp1);
7586 #endif
7587
7588 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
7589 DLIL_PRINTF("%s: failed to allocate if local stats, "
7590 "error: %d\n", __func__, ret);
7591 /* This probably shouldn't be fatal */
7592 ret = 0;
7593 }
7594
7595 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
7596 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
7597 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
7598 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
7599 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
7600 ifnet_lock_attr);
7601 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
7602 #if INET
7603 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
7604 ifnet_lock_attr);
7605 ifp1->if_inetdata = NULL;
7606 #endif
7607 #if INET6
7608 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
7609 ifnet_lock_attr);
7610 ifp1->if_inet6data = NULL;
7611 #endif
7612 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
7613 ifnet_lock_attr);
7614 ifp1->if_link_status = NULL;
7615
7616 /* for send data paths */
7617 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
7618 ifnet_lock_attr);
7619 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
7620 ifnet_lock_attr);
7621 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
7622 ifnet_lock_attr);
7623
7624 /* for receive data paths */
7625 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
7626 ifnet_lock_attr);
7627
7628 /* thread call allocation is done with sleeping zalloc */
7629 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
7630 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
7631 if (ifp1->if_dt_tcall == NULL) {
7632 panic_plain("%s: couldn't create if_dt_tcall", __func__);
7633 /* NOTREACHED */
7634 }
7635
7636 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
7637
7638 *ifp = ifp1;
7639
7640 end:
7641 dlil_if_unlock();
7642
7643 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
7644 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
7645
7646 return ret;
7647 }
7648
7649 __private_extern__ void
7650 dlil_if_release(ifnet_t ifp)
7651 {
7652 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
7653
7654 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
7655 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
7656 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
7657 }
7658
7659 ifnet_lock_exclusive(ifp);
7660 lck_mtx_lock(&dlifp->dl_if_lock);
7661 dlifp->dl_if_flags &= ~DLIF_INUSE;
7662 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
7663 ifp->if_name = dlifp->dl_if_namestorage;
7664 /* Reset external name (name + unit) */
7665 ifp->if_xname = dlifp->dl_if_xnamestorage;
7666 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
7667 "%s?", ifp->if_name);
7668 lck_mtx_unlock(&dlifp->dl_if_lock);
7669 #if CONFIG_MACF_NET
7670 /*
7671 * We can either recycle the MAC label here or in dlil_if_acquire().
7672 * It seems logical to do it here but this means that anything that
7673 * still has a handle on ifp will now see it as unlabeled.
7674 * Since the interface is "dead" that may be OK. Revisit later.
7675 */
7676 mac_ifnet_label_recycle(ifp);
7677 #endif
7678 ifnet_lock_done(ifp);
7679 }
7680
7681 __private_extern__ void
7682 dlil_if_lock(void)
7683 {
7684 lck_mtx_lock(&dlil_ifnet_lock);
7685 }
7686
7687 __private_extern__ void
7688 dlil_if_unlock(void)
7689 {
7690 lck_mtx_unlock(&dlil_ifnet_lock);
7691 }
7692
7693 __private_extern__ void
7694 dlil_if_lock_assert(void)
7695 {
7696 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
7697 }
7698
7699 __private_extern__ void
7700 dlil_proto_unplumb_all(struct ifnet *ifp)
7701 {
7702 /*
7703 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7704 * each bucket contains exactly one entry; PF_VLAN does not need an
7705 * explicit unplumb.
7706 *
7707 * if_proto_hash[3] is for other protocols; we expect anything
7708 * in this bucket to respond to the DETACHING event (which would
7709 * have happened by now) and do the unplumb then.
7710 */
7711 (void) proto_unplumb(PF_INET, ifp);
7712 #if INET6
7713 (void) proto_unplumb(PF_INET6, ifp);
7714 #endif /* INET6 */
7715 }
7716
7717 static void
7718 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
7719 {
7720 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7721 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7722
7723 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
7724
7725 lck_mtx_unlock(&ifp->if_cached_route_lock);
7726 }
7727
7728 static void
7729 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
7730 {
7731 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7732 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7733
7734 if (ifp->if_fwd_cacheok) {
7735 route_copyin(src, &ifp->if_src_route, sizeof(*src));
7736 } else {
7737 ROUTE_RELEASE(src);
7738 }
7739 lck_mtx_unlock(&ifp->if_cached_route_lock);
7740 }
7741
7742 #if INET6
7743 static void
7744 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
7745 {
7746 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7747 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7748
7749 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
7750 sizeof(*dst));
7751
7752 lck_mtx_unlock(&ifp->if_cached_route_lock);
7753 }
7754
7755 static void
7756 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
7757 {
7758 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7759 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7760
7761 if (ifp->if_fwd_cacheok) {
7762 route_copyin((struct route *)src,
7763 (struct route *)&ifp->if_src_route6, sizeof(*src));
7764 } else {
7765 ROUTE_RELEASE(src);
7766 }
7767 lck_mtx_unlock(&ifp->if_cached_route_lock);
7768 }
7769 #endif /* INET6 */
7770
7771 struct rtentry *
7772 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
7773 {
7774 struct route src_rt;
7775 struct sockaddr_in *dst;
7776
7777 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
7778
7779 ifp_src_route_copyout(ifp, &src_rt);
7780
7781 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
7782 ROUTE_RELEASE(&src_rt);
7783 if (dst->sin_family != AF_INET) {
7784 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
7785 dst->sin_len = sizeof(src_rt.ro_dst);
7786 dst->sin_family = AF_INET;
7787 }
7788 dst->sin_addr = src_ip;
7789
7790 VERIFY(src_rt.ro_rt == NULL);
7791 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
7792 0, 0, ifp->if_index);
7793
7794 if (src_rt.ro_rt != NULL) {
7795 /* retain a ref, copyin consumes one */
7796 struct rtentry *rte = src_rt.ro_rt;
7797 RT_ADDREF(rte);
7798 ifp_src_route_copyin(ifp, &src_rt);
7799 src_rt.ro_rt = rte;
7800 }
7801 }
7802
7803 return src_rt.ro_rt;
7804 }
7805
7806 #if INET6
7807 struct rtentry *
7808 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
7809 {
7810 struct route_in6 src_rt;
7811
7812 ifp_src_route6_copyout(ifp, &src_rt);
7813
7814 if (ROUTE_UNUSABLE(&src_rt) ||
7815 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
7816 ROUTE_RELEASE(&src_rt);
7817 if (src_rt.ro_dst.sin6_family != AF_INET6) {
7818 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
7819 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
7820 src_rt.ro_dst.sin6_family = AF_INET6;
7821 }
7822 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
7823 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
7824 sizeof(src_rt.ro_dst.sin6_addr));
7825
7826 if (src_rt.ro_rt == NULL) {
7827 src_rt.ro_rt = rtalloc1_scoped(
7828 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
7829 ifp->if_index);
7830
7831 if (src_rt.ro_rt != NULL) {
7832 /* retain a ref, copyin consumes one */
7833 struct rtentry *rte = src_rt.ro_rt;
7834 RT_ADDREF(rte);
7835 ifp_src_route6_copyin(ifp, &src_rt);
7836 src_rt.ro_rt = rte;
7837 }
7838 }
7839 }
7840
7841 return src_rt.ro_rt;
7842 }
7843 #endif /* INET6 */
7844
7845 void
7846 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
7847 {
7848 struct kev_dl_link_quality_metric_data ev_lqm_data;
7849
7850 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
7851
7852 /* Normalize to edge */
7853 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
7854 lqm = IFNET_LQM_THRESH_ABORT;
7855 atomic_bitset_32(&tcbinfo.ipi_flags,
7856 INPCBINFO_HANDLE_LQM_ABORT);
7857 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
7858 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
7859 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
7860 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
7861 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
7862 lqm <= IFNET_LQM_THRESH_POOR) {
7863 lqm = IFNET_LQM_THRESH_POOR;
7864 } else if (lqm > IFNET_LQM_THRESH_POOR &&
7865 lqm <= IFNET_LQM_THRESH_GOOD) {
7866 lqm = IFNET_LQM_THRESH_GOOD;
7867 }
7868
7869 /*
7870 * Take the lock if needed
7871 */
7872 if (!locked) {
7873 ifnet_lock_exclusive(ifp);
7874 }
7875
7876 if (lqm == ifp->if_interface_state.lqm_state &&
7877 (ifp->if_interface_state.valid_bitmask &
7878 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
7879 /*
7880 * Release the lock if was not held by the caller
7881 */
7882 if (!locked) {
7883 ifnet_lock_done(ifp);
7884 }
7885 return; /* nothing to update */
7886 }
7887 ifp->if_interface_state.valid_bitmask |=
7888 IF_INTERFACE_STATE_LQM_STATE_VALID;
7889 ifp->if_interface_state.lqm_state = lqm;
7890
7891 /*
7892 * Don't want to hold the lock when issuing kernel events
7893 */
7894 ifnet_lock_done(ifp);
7895
7896 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
7897 ev_lqm_data.link_quality_metric = lqm;
7898
7899 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
7900 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data));
7901
7902 /*
7903 * Reacquire the lock for the caller
7904 */
7905 if (locked) {
7906 ifnet_lock_exclusive(ifp);
7907 }
7908 }
7909
7910 static void
7911 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
7912 {
7913 struct kev_dl_rrc_state kev;
7914
7915 if (rrc_state == ifp->if_interface_state.rrc_state &&
7916 (ifp->if_interface_state.valid_bitmask &
7917 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7918 return;
7919 }
7920
7921 ifp->if_interface_state.valid_bitmask |=
7922 IF_INTERFACE_STATE_RRC_STATE_VALID;
7923
7924 ifp->if_interface_state.rrc_state = rrc_state;
7925
7926 /*
7927 * Don't want to hold the lock when issuing kernel events
7928 */
7929 ifnet_lock_done(ifp);
7930
7931 bzero(&kev, sizeof(struct kev_dl_rrc_state));
7932 kev.rrc_state = rrc_state;
7933
7934 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
7935 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
7936
7937 ifnet_lock_exclusive(ifp);
7938 }
7939
7940 errno_t
7941 if_state_update(struct ifnet *ifp,
7942 struct if_interface_state *if_interface_state)
7943 {
7944 u_short if_index_available = 0;
7945
7946 ifnet_lock_exclusive(ifp);
7947
7948 if ((ifp->if_type != IFT_CELLULAR) &&
7949 (if_interface_state->valid_bitmask &
7950 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7951 ifnet_lock_done(ifp);
7952 return ENOTSUP;
7953 }
7954 if ((if_interface_state->valid_bitmask &
7955 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
7956 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
7957 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
7958 ifnet_lock_done(ifp);
7959 return EINVAL;
7960 }
7961 if ((if_interface_state->valid_bitmask &
7962 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
7963 if_interface_state->rrc_state !=
7964 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
7965 if_interface_state->rrc_state !=
7966 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
7967 ifnet_lock_done(ifp);
7968 return EINVAL;
7969 }
7970
7971 if (if_interface_state->valid_bitmask &
7972 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7973 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
7974 }
7975 if (if_interface_state->valid_bitmask &
7976 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7977 if_rrc_state_update(ifp, if_interface_state->rrc_state);
7978 }
7979 if (if_interface_state->valid_bitmask &
7980 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7981 ifp->if_interface_state.valid_bitmask |=
7982 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7983 ifp->if_interface_state.interface_availability =
7984 if_interface_state->interface_availability;
7985
7986 if (ifp->if_interface_state.interface_availability ==
7987 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
7988 if_index_available = ifp->if_index;
7989 }
7990 }
7991 ifnet_lock_done(ifp);
7992
7993 /*
7994 * Check if the TCP connections going on this interface should be
7995 * forced to send probe packets instead of waiting for TCP timers
7996 * to fire. This will be done when there is an explicit
7997 * notification that the interface became available.
7998 */
7999 if (if_index_available > 0) {
8000 tcp_interface_send_probe(if_index_available);
8001 }
8002
8003 return 0;
8004 }
8005
8006 void
8007 if_get_state(struct ifnet *ifp,
8008 struct if_interface_state *if_interface_state)
8009 {
8010 ifnet_lock_shared(ifp);
8011
8012 if_interface_state->valid_bitmask = 0;
8013
8014 if (ifp->if_interface_state.valid_bitmask &
8015 IF_INTERFACE_STATE_RRC_STATE_VALID) {
8016 if_interface_state->valid_bitmask |=
8017 IF_INTERFACE_STATE_RRC_STATE_VALID;
8018 if_interface_state->rrc_state =
8019 ifp->if_interface_state.rrc_state;
8020 }
8021 if (ifp->if_interface_state.valid_bitmask &
8022 IF_INTERFACE_STATE_LQM_STATE_VALID) {
8023 if_interface_state->valid_bitmask |=
8024 IF_INTERFACE_STATE_LQM_STATE_VALID;
8025 if_interface_state->lqm_state =
8026 ifp->if_interface_state.lqm_state;
8027 }
8028 if (ifp->if_interface_state.valid_bitmask &
8029 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
8030 if_interface_state->valid_bitmask |=
8031 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8032 if_interface_state->interface_availability =
8033 ifp->if_interface_state.interface_availability;
8034 }
8035
8036 ifnet_lock_done(ifp);
8037 }
8038
8039 errno_t
8040 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
8041 {
8042 ifnet_lock_exclusive(ifp);
8043 if (conn_probe > 1) {
8044 ifnet_lock_done(ifp);
8045 return EINVAL;
8046 }
8047 if (conn_probe == 0) {
8048 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
8049 } else {
8050 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
8051 }
8052 ifnet_lock_done(ifp);
8053
8054 #if NECP
8055 necp_update_all_clients();
8056 #endif /* NECP */
8057
8058 tcp_probe_connectivity(ifp, conn_probe);
8059 return 0;
8060 }
8061
8062 /* for uuid.c */
8063 int
8064 uuid_get_ethernet(u_int8_t *node)
8065 {
8066 struct ifnet *ifp;
8067 struct sockaddr_dl *sdl;
8068
8069 ifnet_head_lock_shared();
8070 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
8071 ifnet_lock_shared(ifp);
8072 IFA_LOCK_SPIN(ifp->if_lladdr);
8073 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
8074 if (sdl->sdl_type == IFT_ETHER) {
8075 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
8076 IFA_UNLOCK(ifp->if_lladdr);
8077 ifnet_lock_done(ifp);
8078 ifnet_head_done();
8079 return 0;
8080 }
8081 IFA_UNLOCK(ifp->if_lladdr);
8082 ifnet_lock_done(ifp);
8083 }
8084 ifnet_head_done();
8085
8086 return -1;
8087 }
8088
8089 static int
8090 sysctl_rxpoll SYSCTL_HANDLER_ARGS
8091 {
8092 #pragma unused(arg1, arg2)
8093 uint32_t i;
8094 int err;
8095
8096 i = if_rxpoll;
8097
8098 err = sysctl_handle_int(oidp, &i, 0, req);
8099 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8100 return err;
8101 }
8102
8103 if (net_rxpoll == 0) {
8104 return ENXIO;
8105 }
8106
8107 if_rxpoll = i;
8108 return err;
8109 }
8110
8111 static int
8112 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
8113 {
8114 #pragma unused(arg1, arg2)
8115 uint64_t q;
8116 int err;
8117
8118 q = if_rxpoll_mode_holdtime;
8119
8120 err = sysctl_handle_quad(oidp, &q, 0, req);
8121 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8122 return err;
8123 }
8124
8125 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
8126 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
8127 }
8128
8129 if_rxpoll_mode_holdtime = q;
8130
8131 return err;
8132 }
8133
8134 static int
8135 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
8136 {
8137 #pragma unused(arg1, arg2)
8138 uint64_t q;
8139 int err;
8140
8141 q = if_rxpoll_sample_holdtime;
8142
8143 err = sysctl_handle_quad(oidp, &q, 0, req);
8144 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8145 return err;
8146 }
8147
8148 if (q < IF_RXPOLL_SAMPLETIME_MIN) {
8149 q = IF_RXPOLL_SAMPLETIME_MIN;
8150 }
8151
8152 if_rxpoll_sample_holdtime = q;
8153
8154 return err;
8155 }
8156
8157 static int
8158 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
8159 {
8160 #pragma unused(arg1, arg2)
8161 uint64_t q;
8162 int err;
8163
8164 q = if_rxpoll_interval_time;
8165
8166 err = sysctl_handle_quad(oidp, &q, 0, req);
8167 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8168 return err;
8169 }
8170
8171 if (q < IF_RXPOLL_INTERVALTIME_MIN) {
8172 q = IF_RXPOLL_INTERVALTIME_MIN;
8173 }
8174
8175 if_rxpoll_interval_time = q;
8176
8177 return err;
8178 }
8179
8180 static int
8181 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
8182 {
8183 #pragma unused(arg1, arg2)
8184 uint32_t i;
8185 int err;
8186
8187 i = if_rxpoll_wlowat;
8188
8189 err = sysctl_handle_int(oidp, &i, 0, req);
8190 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8191 return err;
8192 }
8193
8194 if (i == 0 || i >= if_rxpoll_whiwat) {
8195 return EINVAL;
8196 }
8197
8198 if_rxpoll_wlowat = i;
8199 return err;
8200 }
8201
8202 static int
8203 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
8204 {
8205 #pragma unused(arg1, arg2)
8206 uint32_t i;
8207 int err;
8208
8209 i = if_rxpoll_whiwat;
8210
8211 err = sysctl_handle_int(oidp, &i, 0, req);
8212 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8213 return err;
8214 }
8215
8216 if (i <= if_rxpoll_wlowat) {
8217 return EINVAL;
8218 }
8219
8220 if_rxpoll_whiwat = i;
8221 return err;
8222 }
8223
8224 static int
8225 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
8226 {
8227 #pragma unused(arg1, arg2)
8228 int i, err;
8229
8230 i = if_sndq_maxlen;
8231
8232 err = sysctl_handle_int(oidp, &i, 0, req);
8233 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8234 return err;
8235 }
8236
8237 if (i < IF_SNDQ_MINLEN) {
8238 i = IF_SNDQ_MINLEN;
8239 }
8240
8241 if_sndq_maxlen = i;
8242 return err;
8243 }
8244
8245 static int
8246 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
8247 {
8248 #pragma unused(arg1, arg2)
8249 int i, err;
8250
8251 i = if_rcvq_maxlen;
8252
8253 err = sysctl_handle_int(oidp, &i, 0, req);
8254 if (err != 0 || req->newptr == USER_ADDR_NULL) {
8255 return err;
8256 }
8257
8258 if (i < IF_RCVQ_MINLEN) {
8259 i = IF_RCVQ_MINLEN;
8260 }
8261
8262 if_rcvq_maxlen = i;
8263 return err;
8264 }
8265
8266 void
8267 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
8268 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
8269 {
8270 struct kev_dl_node_presence kev;
8271 struct sockaddr_dl *sdl;
8272 struct sockaddr_in6 *sin6;
8273
8274 VERIFY(ifp);
8275 VERIFY(sa);
8276 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
8277
8278 bzero(&kev, sizeof(kev));
8279 sin6 = &kev.sin6_node_address;
8280 sdl = &kev.sdl_node_address;
8281 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
8282 kev.rssi = rssi;
8283 kev.link_quality_metric = lqm;
8284 kev.node_proximity_metric = npm;
8285 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
8286
8287 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
8288 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
8289 &kev.link_data, sizeof(kev));
8290 }
8291
8292 void
8293 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
8294 {
8295 struct kev_dl_node_absence kev;
8296 struct sockaddr_in6 *sin6;
8297 struct sockaddr_dl *sdl;
8298
8299 VERIFY(ifp);
8300 VERIFY(sa);
8301 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
8302
8303 bzero(&kev, sizeof(kev));
8304 sin6 = &kev.sin6_node_address;
8305 sdl = &kev.sdl_node_address;
8306 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
8307
8308 nd6_alt_node_absent(ifp, sin6);
8309 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
8310 &kev.link_data, sizeof(kev));
8311 }
8312
8313 const void *
8314 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
8315 kauth_cred_t *credp)
8316 {
8317 const u_int8_t *bytes;
8318 size_t size;
8319
8320 bytes = CONST_LLADDR(sdl);
8321 size = sdl->sdl_alen;
8322
8323 #if CONFIG_MACF
8324 if (dlil_lladdr_ckreq) {
8325 switch (sdl->sdl_type) {
8326 case IFT_ETHER:
8327 case IFT_IEEE1394:
8328 break;
8329 default:
8330 credp = NULL;
8331 break;
8332 }
8333 ;
8334
8335 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
8336 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
8337 [0] = 2
8338 };
8339
8340 bytes = unspec;
8341 }
8342 }
8343 #else
8344 #pragma unused(credp)
8345 #endif
8346
8347 if (sizep != NULL) {
8348 *sizep = size;
8349 }
8350 return bytes;
8351 }
8352
8353 void
8354 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
8355 u_int8_t info[DLIL_MODARGLEN])
8356 {
8357 struct kev_dl_issues kev;
8358 struct timeval tv;
8359
8360 VERIFY(ifp != NULL);
8361 VERIFY(modid != NULL);
8362 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
8363 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
8364
8365 bzero(&kev, sizeof(kev));
8366
8367 microtime(&tv);
8368 kev.timestamp = tv.tv_sec;
8369 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
8370 if (info != NULL) {
8371 bcopy(info, &kev.info, DLIL_MODARGLEN);
8372 }
8373
8374 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
8375 &kev.link_data, sizeof(kev));
8376 }
8377
8378 errno_t
8379 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
8380 struct proc *p)
8381 {
8382 u_int32_t level = IFNET_THROTTLE_OFF;
8383 errno_t result = 0;
8384
8385 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
8386
8387 if (cmd == SIOCSIFOPPORTUNISTIC) {
8388 /*
8389 * XXX: Use priv_check_cred() instead of root check?
8390 */
8391 if ((result = proc_suser(p)) != 0) {
8392 return result;
8393 }
8394
8395 if (ifr->ifr_opportunistic.ifo_flags ==
8396 IFRIFOF_BLOCK_OPPORTUNISTIC) {
8397 level = IFNET_THROTTLE_OPPORTUNISTIC;
8398 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
8399 level = IFNET_THROTTLE_OFF;
8400 } else {
8401 result = EINVAL;
8402 }
8403
8404 if (result == 0) {
8405 result = ifnet_set_throttle(ifp, level);
8406 }
8407 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
8408 ifr->ifr_opportunistic.ifo_flags = 0;
8409 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
8410 ifr->ifr_opportunistic.ifo_flags |=
8411 IFRIFOF_BLOCK_OPPORTUNISTIC;
8412 }
8413 }
8414
8415 /*
8416 * Return the count of current opportunistic connections
8417 * over the interface.
8418 */
8419 if (result == 0) {
8420 uint32_t flags = 0;
8421 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
8422 INPCB_OPPORTUNISTIC_SETCMD : 0;
8423 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
8424 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
8425 ifr->ifr_opportunistic.ifo_inuse =
8426 udp_count_opportunistic(ifp->if_index, flags) +
8427 tcp_count_opportunistic(ifp->if_index, flags);
8428 }
8429
8430 if (result == EALREADY) {
8431 result = 0;
8432 }
8433
8434 return result;
8435 }
8436
8437 int
8438 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
8439 {
8440 struct ifclassq *ifq;
8441 int err = 0;
8442
8443 if (!(ifp->if_eflags & IFEF_TXSTART)) {
8444 return ENXIO;
8445 }
8446
8447 *level = IFNET_THROTTLE_OFF;
8448
8449 ifq = &ifp->if_snd;
8450 IFCQ_LOCK(ifq);
8451 /* Throttling works only for IFCQ, not ALTQ instances */
8452 if (IFCQ_IS_ENABLED(ifq)) {
8453 IFCQ_GET_THROTTLE(ifq, *level, err);
8454 }
8455 IFCQ_UNLOCK(ifq);
8456
8457 return err;
8458 }
8459
8460 int
8461 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
8462 {
8463 struct ifclassq *ifq;
8464 int err = 0;
8465
8466 if (!(ifp->if_eflags & IFEF_TXSTART)) {
8467 return ENXIO;
8468 }
8469
8470 ifq = &ifp->if_snd;
8471
8472 switch (level) {
8473 case IFNET_THROTTLE_OFF:
8474 case IFNET_THROTTLE_OPPORTUNISTIC:
8475 break;
8476 default:
8477 return EINVAL;
8478 }
8479
8480 IFCQ_LOCK(ifq);
8481 if (IFCQ_IS_ENABLED(ifq)) {
8482 IFCQ_SET_THROTTLE(ifq, level, err);
8483 }
8484 IFCQ_UNLOCK(ifq);
8485
8486 if (err == 0) {
8487 printf("%s: throttling level set to %d\n", if_name(ifp),
8488 level);
8489 if (level == IFNET_THROTTLE_OFF) {
8490 ifnet_start(ifp);
8491 }
8492 }
8493
8494 return err;
8495 }
8496
8497 errno_t
8498 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
8499 struct proc *p)
8500 {
8501 #pragma unused(p)
8502 errno_t result = 0;
8503 uint32_t flags;
8504 int level, category, subcategory;
8505
8506 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
8507
8508 if (cmd == SIOCSIFLOG) {
8509 if ((result = priv_check_cred(kauth_cred_get(),
8510 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
8511 return result;
8512 }
8513
8514 level = ifr->ifr_log.ifl_level;
8515 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
8516 result = EINVAL;
8517 }
8518
8519 flags = ifr->ifr_log.ifl_flags;
8520 if ((flags &= IFNET_LOGF_MASK) == 0) {
8521 result = EINVAL;
8522 }
8523
8524 category = ifr->ifr_log.ifl_category;
8525 subcategory = ifr->ifr_log.ifl_subcategory;
8526
8527 if (result == 0) {
8528 result = ifnet_set_log(ifp, level, flags,
8529 category, subcategory);
8530 }
8531 } else {
8532 result = ifnet_get_log(ifp, &level, &flags, &category,
8533 &subcategory);
8534 if (result == 0) {
8535 ifr->ifr_log.ifl_level = level;
8536 ifr->ifr_log.ifl_flags = flags;
8537 ifr->ifr_log.ifl_category = category;
8538 ifr->ifr_log.ifl_subcategory = subcategory;
8539 }
8540 }
8541
8542 return result;
8543 }
8544
8545 int
8546 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
8547 int32_t category, int32_t subcategory)
8548 {
8549 int err = 0;
8550
8551 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
8552 VERIFY(flags & IFNET_LOGF_MASK);
8553
8554 /*
8555 * The logging level applies to all facilities; make sure to
8556 * update them all with the most current level.
8557 */
8558 flags |= ifp->if_log.flags;
8559
8560 if (ifp->if_output_ctl != NULL) {
8561 struct ifnet_log_params l;
8562
8563 bzero(&l, sizeof(l));
8564 l.level = level;
8565 l.flags = flags;
8566 l.flags &= ~IFNET_LOGF_DLIL;
8567 l.category = category;
8568 l.subcategory = subcategory;
8569
8570 /* Send this request to lower layers */
8571 if (l.flags != 0) {
8572 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
8573 sizeof(l), &l);
8574 }
8575 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
8576 /*
8577 * If targeted to the lower layers without an output
8578 * control callback registered on the interface, just
8579 * silently ignore facilities other than ours.
8580 */
8581 flags &= IFNET_LOGF_DLIL;
8582 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
8583 level = 0;
8584 }
8585 }
8586
8587 if (err == 0) {
8588 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
8589 ifp->if_log.flags = 0;
8590 } else {
8591 ifp->if_log.flags |= flags;
8592 }
8593
8594 log(LOG_INFO, "%s: logging level set to %d flags=%b "
8595 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
8596 ifp->if_log.level, ifp->if_log.flags,
8597 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
8598 category, subcategory);
8599 }
8600
8601 return err;
8602 }
8603
8604 int
8605 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
8606 int32_t *category, int32_t *subcategory)
8607 {
8608 if (level != NULL) {
8609 *level = ifp->if_log.level;
8610 }
8611 if (flags != NULL) {
8612 *flags = ifp->if_log.flags;
8613 }
8614 if (category != NULL) {
8615 *category = ifp->if_log.category;
8616 }
8617 if (subcategory != NULL) {
8618 *subcategory = ifp->if_log.subcategory;
8619 }
8620
8621 return 0;
8622 }
8623
8624 int
8625 ifnet_notify_address(struct ifnet *ifp, int af)
8626 {
8627 struct ifnet_notify_address_params na;
8628
8629 #if PF
8630 (void) pf_ifaddr_hook(ifp);
8631 #endif /* PF */
8632
8633 if (ifp->if_output_ctl == NULL) {
8634 return EOPNOTSUPP;
8635 }
8636
8637 bzero(&na, sizeof(na));
8638 na.address_family = af;
8639
8640 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
8641 sizeof(na), &na);
8642 }
8643
8644 errno_t
8645 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
8646 {
8647 if (ifp == NULL || flowid == NULL) {
8648 return EINVAL;
8649 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
8650 !IF_FULLY_ATTACHED(ifp)) {
8651 return ENXIO;
8652 }
8653
8654 *flowid = ifp->if_flowhash;
8655
8656 return 0;
8657 }
8658
8659 errno_t
8660 ifnet_disable_output(struct ifnet *ifp)
8661 {
8662 int err;
8663
8664 if (ifp == NULL) {
8665 return EINVAL;
8666 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
8667 !IF_FULLY_ATTACHED(ifp)) {
8668 return ENXIO;
8669 }
8670
8671 if ((err = ifnet_fc_add(ifp)) == 0) {
8672 lck_mtx_lock_spin(&ifp->if_start_lock);
8673 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
8674 lck_mtx_unlock(&ifp->if_start_lock);
8675 }
8676 return err;
8677 }
8678
8679 errno_t
8680 ifnet_enable_output(struct ifnet *ifp)
8681 {
8682 if (ifp == NULL) {
8683 return EINVAL;
8684 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
8685 !IF_FULLY_ATTACHED(ifp)) {
8686 return ENXIO;
8687 }
8688
8689 ifnet_start_common(ifp, TRUE);
8690 return 0;
8691 }
8692
8693 void
8694 ifnet_flowadv(uint32_t flowhash)
8695 {
8696 struct ifnet_fc_entry *ifce;
8697 struct ifnet *ifp;
8698
8699 ifce = ifnet_fc_get(flowhash);
8700 if (ifce == NULL) {
8701 return;
8702 }
8703
8704 VERIFY(ifce->ifce_ifp != NULL);
8705 ifp = ifce->ifce_ifp;
8706
8707 /* flow hash gets recalculated per attach, so check */
8708 if (ifnet_is_attached(ifp, 1)) {
8709 if (ifp->if_flowhash == flowhash) {
8710 (void) ifnet_enable_output(ifp);
8711 }
8712 ifnet_decr_iorefcnt(ifp);
8713 }
8714 ifnet_fc_entry_free(ifce);
8715 }
8716
8717 /*
8718 * Function to compare ifnet_fc_entries in ifnet flow control tree
8719 */
8720 static inline int
8721 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
8722 {
8723 return fc1->ifce_flowhash - fc2->ifce_flowhash;
8724 }
8725
8726 static int
8727 ifnet_fc_add(struct ifnet *ifp)
8728 {
8729 struct ifnet_fc_entry keyfc, *ifce;
8730 uint32_t flowhash;
8731
8732 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
8733 VERIFY(ifp->if_flowhash != 0);
8734 flowhash = ifp->if_flowhash;
8735
8736 bzero(&keyfc, sizeof(keyfc));
8737 keyfc.ifce_flowhash = flowhash;
8738
8739 lck_mtx_lock_spin(&ifnet_fc_lock);
8740 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
8741 if (ifce != NULL && ifce->ifce_ifp == ifp) {
8742 /* Entry is already in ifnet_fc_tree, return */
8743 lck_mtx_unlock(&ifnet_fc_lock);
8744 return 0;
8745 }
8746
8747 if (ifce != NULL) {
8748 /*
8749 * There is a different fc entry with the same flow hash
8750 * but different ifp pointer. There can be a collision
8751 * on flow hash but the probability is low. Let's just
8752 * avoid adding a second one when there is a collision.
8753 */
8754 lck_mtx_unlock(&ifnet_fc_lock);
8755 return EAGAIN;
8756 }
8757
8758 /* become regular mutex */
8759 lck_mtx_convert_spin(&ifnet_fc_lock);
8760
8761 ifce = zalloc(ifnet_fc_zone);
8762 if (ifce == NULL) {
8763 /* memory allocation failed */
8764 lck_mtx_unlock(&ifnet_fc_lock);
8765 return ENOMEM;
8766 }
8767 bzero(ifce, ifnet_fc_zone_size);
8768
8769 ifce->ifce_flowhash = flowhash;
8770 ifce->ifce_ifp = ifp;
8771
8772 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8773 lck_mtx_unlock(&ifnet_fc_lock);
8774 return 0;
8775 }
8776
8777 static struct ifnet_fc_entry *
8778 ifnet_fc_get(uint32_t flowhash)
8779 {
8780 struct ifnet_fc_entry keyfc, *ifce;
8781 struct ifnet *ifp;
8782
8783 bzero(&keyfc, sizeof(keyfc));
8784 keyfc.ifce_flowhash = flowhash;
8785
8786 lck_mtx_lock_spin(&ifnet_fc_lock);
8787 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
8788 if (ifce == NULL) {
8789 /* Entry is not present in ifnet_fc_tree, return */
8790 lck_mtx_unlock(&ifnet_fc_lock);
8791 return NULL;
8792 }
8793
8794 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8795
8796 VERIFY(ifce->ifce_ifp != NULL);
8797 ifp = ifce->ifce_ifp;
8798
8799 /* become regular mutex */
8800 lck_mtx_convert_spin(&ifnet_fc_lock);
8801
8802 if (!ifnet_is_attached(ifp, 0)) {
8803 /*
8804 * This ifp is not attached or in the process of being
8805 * detached; just don't process it.
8806 */
8807 ifnet_fc_entry_free(ifce);
8808 ifce = NULL;
8809 }
8810 lck_mtx_unlock(&ifnet_fc_lock);
8811
8812 return ifce;
8813 }
8814
8815 static void
8816 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
8817 {
8818 zfree(ifnet_fc_zone, ifce);
8819 }
8820
8821 static uint32_t
8822 ifnet_calc_flowhash(struct ifnet *ifp)
8823 {
8824 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
8825 uint32_t flowhash = 0;
8826
8827 if (ifnet_flowhash_seed == 0) {
8828 ifnet_flowhash_seed = RandomULong();
8829 }
8830
8831 bzero(&fh, sizeof(fh));
8832
8833 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
8834 fh.ifk_unit = ifp->if_unit;
8835 fh.ifk_flags = ifp->if_flags;
8836 fh.ifk_eflags = ifp->if_eflags;
8837 fh.ifk_capabilities = ifp->if_capabilities;
8838 fh.ifk_capenable = ifp->if_capenable;
8839 fh.ifk_output_sched_model = ifp->if_output_sched_model;
8840 fh.ifk_rand1 = RandomULong();
8841 fh.ifk_rand2 = RandomULong();
8842
8843 try_again:
8844 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
8845 if (flowhash == 0) {
8846 /* try to get a non-zero flowhash */
8847 ifnet_flowhash_seed = RandomULong();
8848 goto try_again;
8849 }
8850
8851 return flowhash;
8852 }
8853
8854 int
8855 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
8856 uint16_t flags, uint8_t *data)
8857 {
8858 #pragma unused(flags)
8859 int error = 0;
8860
8861 switch (family) {
8862 case AF_INET:
8863 if_inetdata_lock_exclusive(ifp);
8864 if (IN_IFEXTRA(ifp) != NULL) {
8865 if (len == 0) {
8866 /* Allow clearing the signature */
8867 IN_IFEXTRA(ifp)->netsig_len = 0;
8868 bzero(IN_IFEXTRA(ifp)->netsig,
8869 sizeof(IN_IFEXTRA(ifp)->netsig));
8870 if_inetdata_lock_done(ifp);
8871 break;
8872 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
8873 error = EINVAL;
8874 if_inetdata_lock_done(ifp);
8875 break;
8876 }
8877 IN_IFEXTRA(ifp)->netsig_len = len;
8878 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
8879 } else {
8880 error = ENOMEM;
8881 }
8882 if_inetdata_lock_done(ifp);
8883 break;
8884
8885 case AF_INET6:
8886 if_inet6data_lock_exclusive(ifp);
8887 if (IN6_IFEXTRA(ifp) != NULL) {
8888 if (len == 0) {
8889 /* Allow clearing the signature */
8890 IN6_IFEXTRA(ifp)->netsig_len = 0;
8891 bzero(IN6_IFEXTRA(ifp)->netsig,
8892 sizeof(IN6_IFEXTRA(ifp)->netsig));
8893 if_inet6data_lock_done(ifp);
8894 break;
8895 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
8896 error = EINVAL;
8897 if_inet6data_lock_done(ifp);
8898 break;
8899 }
8900 IN6_IFEXTRA(ifp)->netsig_len = len;
8901 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
8902 } else {
8903 error = ENOMEM;
8904 }
8905 if_inet6data_lock_done(ifp);
8906 break;
8907
8908 default:
8909 error = EINVAL;
8910 break;
8911 }
8912
8913 return error;
8914 }
8915
8916 int
8917 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
8918 uint16_t *flags, uint8_t *data)
8919 {
8920 int error = 0;
8921
8922 if (ifp == NULL || len == NULL || data == NULL) {
8923 return EINVAL;
8924 }
8925
8926 switch (family) {
8927 case AF_INET:
8928 if_inetdata_lock_shared(ifp);
8929 if (IN_IFEXTRA(ifp) != NULL) {
8930 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
8931 error = EINVAL;
8932 if_inetdata_lock_done(ifp);
8933 break;
8934 }
8935 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0) {
8936 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
8937 } else {
8938 error = ENOENT;
8939 }
8940 } else {
8941 error = ENOMEM;
8942 }
8943 if_inetdata_lock_done(ifp);
8944 break;
8945
8946 case AF_INET6:
8947 if_inet6data_lock_shared(ifp);
8948 if (IN6_IFEXTRA(ifp) != NULL) {
8949 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
8950 error = EINVAL;
8951 if_inet6data_lock_done(ifp);
8952 break;
8953 }
8954 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0) {
8955 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
8956 } else {
8957 error = ENOENT;
8958 }
8959 } else {
8960 error = ENOMEM;
8961 }
8962 if_inet6data_lock_done(ifp);
8963 break;
8964
8965 default:
8966 error = EINVAL;
8967 break;
8968 }
8969
8970 if (error == 0 && flags != NULL) {
8971 *flags = 0;
8972 }
8973
8974 return error;
8975 }
8976
8977 #if INET6
8978 int
8979 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8980 {
8981 int i, error = 0, one_set = 0;
8982
8983 if_inet6data_lock_exclusive(ifp);
8984
8985 if (IN6_IFEXTRA(ifp) == NULL) {
8986 error = ENOMEM;
8987 goto out;
8988 }
8989
8990 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8991 uint32_t prefix_len =
8992 prefixes[i].prefix_len;
8993 struct in6_addr *prefix =
8994 &prefixes[i].ipv6_prefix;
8995
8996 if (prefix_len == 0) {
8997 clat_log0((LOG_DEBUG,
8998 "NAT64 prefixes purged from Interface %s\n",
8999 if_name(ifp)));
9000 /* Allow clearing the signature */
9001 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
9002 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9003 sizeof(struct in6_addr));
9004
9005 continue;
9006 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
9007 prefix_len != NAT64_PREFIX_LEN_40 &&
9008 prefix_len != NAT64_PREFIX_LEN_48 &&
9009 prefix_len != NAT64_PREFIX_LEN_56 &&
9010 prefix_len != NAT64_PREFIX_LEN_64 &&
9011 prefix_len != NAT64_PREFIX_LEN_96) {
9012 clat_log0((LOG_DEBUG,
9013 "NAT64 prefixlen is incorrect %d\n", prefix_len));
9014 error = EINVAL;
9015 goto out;
9016 }
9017
9018 if (IN6_IS_SCOPE_EMBED(prefix)) {
9019 clat_log0((LOG_DEBUG,
9020 "NAT64 prefix has interface/link local scope.\n"));
9021 error = EINVAL;
9022 goto out;
9023 }
9024
9025 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
9026 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
9027 sizeof(struct in6_addr));
9028 clat_log0((LOG_DEBUG,
9029 "NAT64 prefix set to %s with prefixlen: %d\n",
9030 ip6_sprintf(prefix), prefix_len));
9031 one_set = 1;
9032 }
9033
9034 out:
9035 if_inet6data_lock_done(ifp);
9036
9037 if (error == 0 && one_set != 0) {
9038 necp_update_all_clients();
9039 }
9040
9041 return error;
9042 }
9043
9044 int
9045 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
9046 {
9047 int i, found_one = 0, error = 0;
9048
9049 if (ifp == NULL) {
9050 return EINVAL;
9051 }
9052
9053 if_inet6data_lock_shared(ifp);
9054
9055 if (IN6_IFEXTRA(ifp) == NULL) {
9056 error = ENOMEM;
9057 goto out;
9058 }
9059
9060 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
9061 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
9062 found_one = 1;
9063 }
9064 }
9065
9066 if (found_one == 0) {
9067 error = ENOENT;
9068 goto out;
9069 }
9070
9071 if (prefixes) {
9072 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
9073 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
9074 }
9075
9076 out:
9077 if_inet6data_lock_done(ifp);
9078
9079 return error;
9080 }
9081 #endif
9082
9083 static void
9084 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
9085 protocol_family_t pf)
9086 {
9087 #pragma unused(ifp)
9088 uint32_t did_sw;
9089
9090 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
9091 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
9092 return;
9093 }
9094
9095 switch (pf) {
9096 case PF_INET:
9097 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
9098 if (did_sw & CSUM_DELAY_IP) {
9099 hwcksum_dbg_finalized_hdr++;
9100 }
9101 if (did_sw & CSUM_DELAY_DATA) {
9102 hwcksum_dbg_finalized_data++;
9103 }
9104 break;
9105 #if INET6
9106 case PF_INET6:
9107 /*
9108 * Checksum offload should not have been enabled when
9109 * extension headers exist; that also means that we
9110 * cannot force-finalize packets with extension headers.
9111 * Indicate to the callee should it skip such case by
9112 * setting optlen to -1.
9113 */
9114 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
9115 m->m_pkthdr.csum_flags);
9116 if (did_sw & CSUM_DELAY_IPV6_DATA) {
9117 hwcksum_dbg_finalized_data++;
9118 }
9119 break;
9120 #endif /* INET6 */
9121 default:
9122 return;
9123 }
9124 }
9125
9126 static void
9127 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
9128 protocol_family_t pf)
9129 {
9130 uint16_t sum = 0;
9131 uint32_t hlen;
9132
9133 if (frame_header == NULL ||
9134 frame_header < (char *)mbuf_datastart(m) ||
9135 frame_header > (char *)m->m_data) {
9136 printf("%s: frame header pointer 0x%llx out of range "
9137 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
9138 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
9139 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
9140 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
9141 (uint64_t)VM_KERNEL_ADDRPERM(m));
9142 return;
9143 }
9144 hlen = (m->m_data - frame_header);
9145
9146 switch (pf) {
9147 case PF_INET:
9148 #if INET6
9149 case PF_INET6:
9150 #endif /* INET6 */
9151 break;
9152 default:
9153 return;
9154 }
9155
9156 /*
9157 * Force partial checksum offload; useful to simulate cases
9158 * where the hardware does not support partial checksum offload,
9159 * in order to validate correctness throughout the layers above.
9160 */
9161 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
9162 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
9163
9164 if (foff > (uint32_t)m->m_pkthdr.len) {
9165 return;
9166 }
9167
9168 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
9169
9170 /* Compute 16-bit 1's complement sum from forced offset */
9171 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
9172
9173 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
9174 m->m_pkthdr.csum_rx_val = sum;
9175 m->m_pkthdr.csum_rx_start = (foff + hlen);
9176
9177 hwcksum_dbg_partial_forced++;
9178 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
9179 }
9180
9181 /*
9182 * Partial checksum offload verification (and adjustment);
9183 * useful to validate and test cases where the hardware
9184 * supports partial checksum offload.
9185 */
9186 if ((m->m_pkthdr.csum_flags &
9187 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
9188 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
9189 uint32_t rxoff;
9190
9191 /* Start offset must begin after frame header */
9192 rxoff = m->m_pkthdr.csum_rx_start;
9193 if (hlen > rxoff) {
9194 hwcksum_dbg_bad_rxoff++;
9195 if (dlil_verbose) {
9196 printf("%s: partial cksum start offset %d "
9197 "is less than frame header length %d for "
9198 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
9199 (uint64_t)VM_KERNEL_ADDRPERM(m));
9200 }
9201 return;
9202 }
9203 rxoff -= hlen;
9204
9205 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
9206 /*
9207 * Compute the expected 16-bit 1's complement sum;
9208 * skip this if we've already computed it above
9209 * when partial checksum offload is forced.
9210 */
9211 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
9212
9213 /* Hardware or driver is buggy */
9214 if (sum != m->m_pkthdr.csum_rx_val) {
9215 hwcksum_dbg_bad_cksum++;
9216 if (dlil_verbose) {
9217 printf("%s: bad partial cksum value "
9218 "0x%x (expected 0x%x) for mbuf "
9219 "0x%llx [rx_start %d]\n",
9220 if_name(ifp),
9221 m->m_pkthdr.csum_rx_val, sum,
9222 (uint64_t)VM_KERNEL_ADDRPERM(m),
9223 m->m_pkthdr.csum_rx_start);
9224 }
9225 return;
9226 }
9227 }
9228 hwcksum_dbg_verified++;
9229
9230 /*
9231 * This code allows us to emulate various hardwares that
9232 * perform 16-bit 1's complement sum beginning at various
9233 * start offset values.
9234 */
9235 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
9236 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
9237
9238 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
9239 return;
9240 }
9241
9242 sum = m_adj_sum16(m, rxoff, aoff,
9243 m_pktlen(m) - aoff, sum);
9244
9245 m->m_pkthdr.csum_rx_val = sum;
9246 m->m_pkthdr.csum_rx_start = (aoff + hlen);
9247
9248 hwcksum_dbg_adjusted++;
9249 }
9250 }
9251 }
9252
9253 static int
9254 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
9255 {
9256 #pragma unused(arg1, arg2)
9257 u_int32_t i;
9258 int err;
9259
9260 i = hwcksum_dbg_mode;
9261
9262 err = sysctl_handle_int(oidp, &i, 0, req);
9263 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9264 return err;
9265 }
9266
9267 if (hwcksum_dbg == 0) {
9268 return ENODEV;
9269 }
9270
9271 if ((i & ~HWCKSUM_DBG_MASK) != 0) {
9272 return EINVAL;
9273 }
9274
9275 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
9276
9277 return err;
9278 }
9279
9280 static int
9281 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
9282 {
9283 #pragma unused(arg1, arg2)
9284 u_int32_t i;
9285 int err;
9286
9287 i = hwcksum_dbg_partial_rxoff_forced;
9288
9289 err = sysctl_handle_int(oidp, &i, 0, req);
9290 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9291 return err;
9292 }
9293
9294 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
9295 return ENODEV;
9296 }
9297
9298 hwcksum_dbg_partial_rxoff_forced = i;
9299
9300 return err;
9301 }
9302
9303 static int
9304 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
9305 {
9306 #pragma unused(arg1, arg2)
9307 u_int32_t i;
9308 int err;
9309
9310 i = hwcksum_dbg_partial_rxoff_adj;
9311
9312 err = sysctl_handle_int(oidp, &i, 0, req);
9313 if (err != 0 || req->newptr == USER_ADDR_NULL) {
9314 return err;
9315 }
9316
9317 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
9318 return ENODEV;
9319 }
9320
9321 hwcksum_dbg_partial_rxoff_adj = i;
9322
9323 return err;
9324 }
9325
9326 static int
9327 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
9328 {
9329 #pragma unused(oidp, arg1, arg2)
9330 int err;
9331
9332 if (req->oldptr == USER_ADDR_NULL) {
9333 }
9334 if (req->newptr != USER_ADDR_NULL) {
9335 return EPERM;
9336 }
9337 err = SYSCTL_OUT(req, &tx_chain_len_stats,
9338 sizeof(struct chain_len_stats));
9339
9340 return err;
9341 }
9342
9343
9344 #if DEBUG || DEVELOPMENT
9345 /* Blob for sum16 verification */
9346 static uint8_t sumdata[] = {
9347 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
9348 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
9349 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
9350 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
9351 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
9352 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
9353 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
9354 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
9355 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
9356 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
9357 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
9358 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
9359 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
9360 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
9361 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
9362 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
9363 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
9364 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
9365 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
9366 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
9367 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
9368 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
9369 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
9370 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
9371 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
9372 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
9373 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
9374 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
9375 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
9376 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
9377 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
9378 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
9379 0xc8, 0x28, 0x02, 0x00, 0x00
9380 };
9381
9382 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
9383 static struct {
9384 boolean_t init;
9385 uint16_t len;
9386 uint16_t sumr; /* reference */
9387 uint16_t sumrp; /* reference, precomputed */
9388 } sumtbl[] = {
9389 { FALSE, 0, 0, 0x0000 },
9390 { FALSE, 1, 0, 0x001f },
9391 { FALSE, 2, 0, 0x8b1f },
9392 { FALSE, 3, 0, 0x8b27 },
9393 { FALSE, 7, 0, 0x790e },
9394 { FALSE, 11, 0, 0xcb6d },
9395 { FALSE, 20, 0, 0x20dd },
9396 { FALSE, 27, 0, 0xbabd },
9397 { FALSE, 32, 0, 0xf3e8 },
9398 { FALSE, 37, 0, 0x197d },
9399 { FALSE, 43, 0, 0x9eae },
9400 { FALSE, 64, 0, 0x4678 },
9401 { FALSE, 127, 0, 0x9399 },
9402 { FALSE, 256, 0, 0xd147 },
9403 { FALSE, 325, 0, 0x0358 },
9404 };
9405 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
9406
9407 static void
9408 dlil_verify_sum16(void)
9409 {
9410 struct mbuf *m;
9411 uint8_t *buf;
9412 int n;
9413
9414 /* Make sure test data plus extra room for alignment fits in cluster */
9415 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
9416
9417 kprintf("DLIL: running SUM16 self-tests ... ");
9418
9419 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
9420 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
9421
9422 buf = mtod(m, uint8_t *); /* base address */
9423
9424 for (n = 0; n < SUMTBL_MAX; n++) {
9425 uint16_t len = sumtbl[n].len;
9426 int i;
9427
9428 /* Verify for all possible alignments */
9429 for (i = 0; i < (int)sizeof(uint64_t); i++) {
9430 uint16_t sum, sumr;
9431 uint8_t *c;
9432
9433 /* Copy over test data to mbuf */
9434 VERIFY(len <= sizeof(sumdata));
9435 c = buf + i;
9436 bcopy(sumdata, c, len);
9437
9438 /* Zero-offset test (align by data pointer) */
9439 m->m_data = (caddr_t)c;
9440 m->m_len = len;
9441 sum = m_sum16(m, 0, len);
9442
9443 if (!sumtbl[n].init) {
9444 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
9445 sumtbl[n].sumr = sumr;
9446 sumtbl[n].init = TRUE;
9447 } else {
9448 sumr = sumtbl[n].sumr;
9449 }
9450
9451 /* Something is horribly broken; stop now */
9452 if (sumr != sumtbl[n].sumrp) {
9453 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
9454 "for len=%d align=%d sum=0x%04x "
9455 "[expected=0x%04x]\n", __func__,
9456 len, i, sum, sumr);
9457 /* NOTREACHED */
9458 } else if (sum != sumr) {
9459 panic_plain("\n%s: broken m_sum16() for len=%d "
9460 "align=%d sum=0x%04x [expected=0x%04x]\n",
9461 __func__, len, i, sum, sumr);
9462 /* NOTREACHED */
9463 }
9464
9465 /* Alignment test by offset (fixed data pointer) */
9466 m->m_data = (caddr_t)buf;
9467 m->m_len = i + len;
9468 sum = m_sum16(m, i, len);
9469
9470 /* Something is horribly broken; stop now */
9471 if (sum != sumr) {
9472 panic_plain("\n%s: broken m_sum16() for len=%d "
9473 "offset=%d sum=0x%04x [expected=0x%04x]\n",
9474 __func__, len, i, sum, sumr);
9475 /* NOTREACHED */
9476 }
9477 #if INET
9478 /* Simple sum16 contiguous buffer test by aligment */
9479 sum = b_sum16(c, len);
9480
9481 /* Something is horribly broken; stop now */
9482 if (sum != sumr) {
9483 panic_plain("\n%s: broken b_sum16() for len=%d "
9484 "align=%d sum=0x%04x [expected=0x%04x]\n",
9485 __func__, len, i, sum, sumr);
9486 /* NOTREACHED */
9487 }
9488 #endif /* INET */
9489 }
9490 }
9491 m_freem(m);
9492
9493 kprintf("PASSED\n");
9494 }
9495 #endif /* DEBUG || DEVELOPMENT */
9496
9497 #define CASE_STRINGIFY(x) case x: return #x
9498
9499 __private_extern__ const char *
9500 dlil_kev_dl_code_str(u_int32_t event_code)
9501 {
9502 switch (event_code) {
9503 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
9504 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
9505 CASE_STRINGIFY(KEV_DL_SIFMTU);
9506 CASE_STRINGIFY(KEV_DL_SIFPHYS);
9507 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
9508 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
9509 CASE_STRINGIFY(KEV_DL_ADDMULTI);
9510 CASE_STRINGIFY(KEV_DL_DELMULTI);
9511 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
9512 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
9513 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
9514 CASE_STRINGIFY(KEV_DL_LINK_OFF);
9515 CASE_STRINGIFY(KEV_DL_LINK_ON);
9516 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
9517 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
9518 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
9519 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
9520 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
9521 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
9522 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
9523 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
9524 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
9525 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
9526 CASE_STRINGIFY(KEV_DL_ISSUES);
9527 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
9528 default:
9529 break;
9530 }
9531 return "";
9532 }
9533
9534 static void
9535 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
9536 {
9537 #pragma unused(arg1)
9538 struct ifnet *ifp = arg0;
9539
9540 if (ifnet_is_attached(ifp, 1)) {
9541 nstat_ifnet_threshold_reached(ifp->if_index);
9542 ifnet_decr_iorefcnt(ifp);
9543 }
9544 }
9545
9546 void
9547 ifnet_notify_data_threshold(struct ifnet *ifp)
9548 {
9549 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
9550 uint64_t oldbytes = ifp->if_dt_bytes;
9551
9552 ASSERT(ifp->if_dt_tcall != NULL);
9553
9554 /*
9555 * If we went over the threshold, notify NetworkStatistics.
9556 * We rate-limit it based on the threshold interval value.
9557 */
9558 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
9559 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
9560 !thread_call_isactive(ifp->if_dt_tcall)) {
9561 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
9562 uint64_t now = mach_absolute_time(), deadline = now;
9563 uint64_t ival;
9564
9565 if (tival != 0) {
9566 nanoseconds_to_absolutetime(tival, &ival);
9567 clock_deadline_for_periodic_event(ival, now, &deadline);
9568 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
9569 deadline);
9570 } else {
9571 (void) thread_call_enter(ifp->if_dt_tcall);
9572 }
9573 }
9574 }
9575
9576 #if (DEVELOPMENT || DEBUG)
9577 /*
9578 * The sysctl variable name contains the input parameters of
9579 * ifnet_get_keepalive_offload_frames()
9580 * ifp (interface index): name[0]
9581 * frames_array_count: name[1]
9582 * frame_data_offset: name[2]
9583 * The return length gives used_frames_count
9584 */
9585 static int
9586 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
9587 {
9588 #pragma unused(oidp)
9589 int *name = (int *)arg1;
9590 u_int namelen = arg2;
9591 int idx;
9592 ifnet_t ifp = NULL;
9593 u_int32_t frames_array_count;
9594 size_t frame_data_offset;
9595 u_int32_t used_frames_count;
9596 struct ifnet_keepalive_offload_frame *frames_array = NULL;
9597 int error = 0;
9598 u_int32_t i;
9599
9600 /*
9601 * Only root can get look at other people TCP frames
9602 */
9603 error = proc_suser(current_proc());
9604 if (error != 0) {
9605 goto done;
9606 }
9607 /*
9608 * Validate the input parameters
9609 */
9610 if (req->newptr != USER_ADDR_NULL) {
9611 error = EPERM;
9612 goto done;
9613 }
9614 if (namelen != 3) {
9615 error = EINVAL;
9616 goto done;
9617 }
9618 if (req->oldptr == USER_ADDR_NULL) {
9619 error = EINVAL;
9620 goto done;
9621 }
9622 if (req->oldlen == 0) {
9623 error = EINVAL;
9624 goto done;
9625 }
9626 idx = name[0];
9627 frames_array_count = name[1];
9628 frame_data_offset = name[2];
9629
9630 /* Make sure the passed buffer is large enough */
9631 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
9632 req->oldlen) {
9633 error = ENOMEM;
9634 goto done;
9635 }
9636
9637 ifnet_head_lock_shared();
9638 if (!IF_INDEX_IN_RANGE(idx)) {
9639 ifnet_head_done();
9640 error = ENOENT;
9641 goto done;
9642 }
9643 ifp = ifindex2ifnet[idx];
9644 ifnet_head_done();
9645
9646 frames_array = _MALLOC(frames_array_count *
9647 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
9648 if (frames_array == NULL) {
9649 error = ENOMEM;
9650 goto done;
9651 }
9652
9653 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
9654 frames_array_count, frame_data_offset, &used_frames_count);
9655 if (error != 0) {
9656 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
9657 __func__, error);
9658 goto done;
9659 }
9660
9661 for (i = 0; i < used_frames_count; i++) {
9662 error = SYSCTL_OUT(req, frames_array + i,
9663 sizeof(struct ifnet_keepalive_offload_frame));
9664 if (error != 0) {
9665 goto done;
9666 }
9667 }
9668 done:
9669 if (frames_array != NULL) {
9670 _FREE(frames_array, M_TEMP);
9671 }
9672 return error;
9673 }
9674 #endif /* DEVELOPMENT || DEBUG */
9675
9676 void
9677 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
9678 struct ifnet *ifp)
9679 {
9680 tcp_update_stats_per_flow(ifs, ifp);
9681 }
9682
9683 static void
9684 dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
9685 {
9686 #pragma unused(arg1)
9687 struct ifnet *ifp = (struct ifnet *)arg0;
9688 struct dlil_threading_info *inp = ifp->if_inp;
9689
9690 ifnet_lock_shared(ifp);
9691 if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) {
9692 ifnet_lock_done(ifp);
9693 return;
9694 }
9695
9696 lck_mtx_lock_spin(&inp->input_lck);
9697 inp->input_waiting |= DLIL_INPUT_WAITING;
9698 if (!(inp->input_waiting & DLIL_INPUT_RUNNING) ||
9699 !qempty(&inp->rcvq_pkts)) {
9700 inp->wtot++;
9701 wakeup_one((caddr_t)&inp->input_waiting);
9702 }
9703 lck_mtx_unlock(&inp->input_lck);
9704 ifnet_lock_done(ifp);
9705 }