2 * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 #include <sys/kernel_types.h>
33 #include <net/kpi_interface.h>
43 * DLIL_DESC_ETYPE2 - native_type must point to 2 byte ethernet raw protocol,
44 * variants.native_type_length must be set to 2
45 * DLIL_DESC_SAP - native_type must point to 3 byte SAP protocol
46 * variants.native_type_length must be set to 3
47 * DLIL_DESC_SNAP - native_type must point to 5 byte SNAP protocol
48 * variants.native_type_length must be set to 5
50 * All protocols must be in Network byte order.
52 * Future interface families may define more protocol types they know about.
53 * The type implies the offset and context of the protocol data at native_type.
54 * The length of the protocol data specified at native_type must be set in
55 * variants.native_type_length.
57 /* Ethernet specific types */
58 #define DLIL_DESC_ETYPE2 4
59 #define DLIL_DESC_SAP 5
60 #define DLIL_DESC_SNAP 6
64 #include <net/if_var.h>
65 #include <net/classq/classq.h>
66 #include <net/flowadv.h>
67 #include <sys/kern_event.h>
68 #include <kern/thread.h>
69 #include <kern/locks.h>
71 #ifdef BSD_KERNEL_PRIVATE
72 /* Operations on timespecs. */
73 #define net_timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_nsec = 0
75 #define net_timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec)
77 #define net_timercmp(tvp, uvp, cmp) \
78 (((tvp)->tv_sec == (uvp)->tv_sec) ? \
79 ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \
80 ((tvp)->tv_sec cmp (uvp)->tv_sec))
82 #define net_timeradd(tvp, uvp, vvp) do { \
83 (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \
84 (vvp)->tv_nsec = (tvp)->tv_nsec + (uvp)->tv_nsec; \
85 if ((vvp)->tv_nsec >= (long)NSEC_PER_SEC) { \
87 (vvp)->tv_nsec -= NSEC_PER_SEC; \
91 #define net_timersub(tvp, uvp, vvp) do { \
92 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
93 (vvp)->tv_nsec = (tvp)->tv_nsec - (uvp)->tv_nsec; \
94 if ((vvp)->tv_nsec < 0) { \
96 (vvp)->tv_nsec += NSEC_PER_SEC; \
100 #define net_timerusec(tvp, nsp) do { \
101 *(nsp) = (tvp)->tv_nsec / NSEC_PER_USEC; \
102 if ((tvp)->tv_sec > 0) \
103 *(nsp) += ((tvp)->tv_sec * USEC_PER_SEC); \
106 #define net_timernsec(tvp, nsp) do { \
107 *(nsp) = (tvp)->tv_nsec; \
108 if ((tvp)->tv_sec > 0) \
109 *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC); \
112 #if defined(__x86_64__) || defined(__arm64__)
113 #define net_nsectimer(nsp, tvp) do { \
114 u_int64_t __nsp = *(nsp); \
115 net_timerclear(tvp); \
116 uint64_t __sec = __nsp / NSEC_PER_SEC; \
117 (tvp)->tv_sec = (__darwin_time_t)__sec; \
118 (tvp)->tv_nsec = (long)(__nsp - __sec * NSEC_PER_SEC); \
122 * NSEC needs to be < 2^31*10^9 to be representable in a struct timespec
123 * because __darwin_time_t is 32 bit on 32-bit platforms. This bound
124 * is < 2^61. We get a first approximation to convert into seconds using
125 * the following values.
126 * a = floor(NSEC / 2^29)
127 * inv = floor(2^61 / 10^9)
129 * The approximation of seconds is correct or too low by 1 unit.
130 * So we fix it by computing the remainder.
132 #define net_nsectimer(nsp, tvp) do { \
133 u_int64_t __nsp = *(nsp); \
134 net_timerclear(tvp); \
135 uint32_t __a = (uint32_t)(__nsp >> 29); \
136 const uint32_t __inv = 0x89705F41; \
137 uint32_t __sec = (uint32_t)(((uint64_t)__a * __inv) >> 32); \
138 uint32_t __rem = (uint32_t)(__nsp - __sec * NSEC_PER_SEC); \
139 __sec += ((__rem >= NSEC_PER_SEC) ? 1 : 0); \
140 (tvp)->tv_sec = (__darwin_time_t)__sec; \
142 (long)((__rem >= NSEC_PER_SEC) ? (__rem - NSEC_PER_SEC) : __rem); \
152 #define DLIL_THREADNAME_LEN 32
155 * DLIL input thread info
157 struct dlil_threading_info
{
158 decl_lck_mtx_data(, input_lck
);
159 lck_grp_t
*lck_grp
; /* lock group (for lock stats) */
160 u_int32_t input_waiting
; /* DLIL condition of thread */
161 u_int32_t wtot
; /* # of wakeup requests */
162 char input_name
[DLIL_THREADNAME_LEN
]; /* name storage */
163 struct ifnet
*ifp
; /* pointer to interface */
164 class_queue_t rcvq_pkts
; /* queue of pkts */
165 struct ifnet_stat_increment_param stats
; /* incremental statistics */
167 * Thread affinity (workloop and DLIL threads).
169 boolean_t net_affinity
; /* affinity set is available */
170 struct thread
*input_thr
; /* input thread */
171 struct thread
*wloop_thr
; /* workloop thread */
172 struct thread
*poll_thr
; /* poll thread */
173 u_int32_t tag
; /* affinity tag */
174 #if IFNET_INPUT_SANITY_CHK
178 u_int64_t input_mbuf_cnt
; /* total # of packets processed */
183 * DLIL input thread info (for main/loopback input thread)
185 struct dlil_main_threading_info
{
186 struct dlil_threading_info inp
;
187 class_queue_t lo_rcvq_pkts
; /* queue of lo0 pkts */
191 * The following are shared with kpi_protocol.c so that it may wakeup
192 * the input thread to run through packets queued for protocol input.
194 #define DLIL_INPUT_RUNNING 0x80000000
195 #define DLIL_INPUT_WAITING 0x40000000
196 #define DLIL_PROTO_REGISTER 0x20000000
197 #define DLIL_PROTO_WAITING 0x10000000
198 #define DLIL_INPUT_TERMINATE 0x08000000
199 #define DLIL_INPUT_TERMINATE_COMPLETE 0x04000000
202 * Flags for dlil_attach_filter()
204 #define DLIL_IFF_TSO 0x01 /* Interface filter supports TSO */
205 #define DLIL_IFF_INTERNAL 0x02 /* Apple internal -- do not count towards stats */
207 /* Input poll interval definitions */
208 #define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
209 #define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
211 extern int dlil_verbose
;
212 extern uint32_t hwcksum_dbg
;
213 extern uint32_t hwcksum_tx
;
214 extern uint32_t hwcksum_rx
;
215 extern struct dlil_threading_info
*dlil_main_input_thread
;
216 extern unsigned int net_rxpoll
;
217 extern uint32_t if_rxpoll
;
218 extern uint32_t if_rxpoll_decay
;
219 extern uint32_t if_rxpoll_interval_pkts
;
220 extern uint32_t if_rcvq_maxlen
;
222 extern void dlil_init(void);
224 extern errno_t
ifp_if_ioctl(struct ifnet
*, unsigned long, void *);
225 extern errno_t
ifp_if_output(struct ifnet
*, struct mbuf
*);
226 extern void ifp_if_start(struct ifnet
*);
228 extern errno_t
dlil_set_bpf_tap(ifnet_t
, bpf_tap_mode
, bpf_packet_func
);
231 * Send arp internal bypasses the check for IPv4LL.
233 extern errno_t
dlil_send_arp_internal(ifnet_t
, u_int16_t
,
234 const struct sockaddr_dl
*, const struct sockaddr
*,
235 const struct sockaddr_dl
*, const struct sockaddr
*);
238 * The following constants are used with the net_thread_mark_apply and
239 * net_thread_is_unmarked functions to control the bits in the uu_network_marks
240 * field of the uthread structure.
242 #define NET_THREAD_HELD_PF 0x1 /* thread is holding PF lock */
243 #define NET_THREAD_HELD_DOMAIN 0x2 /* thread is holding domain_proto_mtx */
244 #define NET_THREAD_CKREQ_LLADDR 0x4 /* thread reqs MACF check for LLADDR */
247 * net_thread_marks_t is a pointer to a phantom structure type used for
248 * manipulating the uthread:uu_network_marks field. As an example...
250 * static const u_int32_t bits = NET_THREAD_CKREQ_LLADDR;
251 * struct uthread *uth = get_bsdthread_info(current_thread());
253 * net_thread_marks_t marks = net_thread_marks_push(bits);
254 * VERIFY((uth->uu_network_marks & NET_THREAD_CKREQ_LLADDR) != 0);
255 * net_thread_marks_pop(marks);
257 * The net_thread_marks_push() function returns an encoding of the bits
258 * that were changed from zero to one in the uu_network_marks field. When
259 * the net_thread_marks_pop() function later processes that value, it
260 * resets the bits to their previous value.
262 * The net_thread_unmarks_push() and net_thread_unmarks_pop() functions
263 * are similar to net_thread_marks_push() and net_thread_marks_pop() except
264 * they clear the marks bits in the guarded section rather than set them.
266 * The net_thread_is_marked() and net_thread_is_unmarked() functions return
267 * the subset of the bits that are currently set or cleared (respectively)
268 * in the uthread:uu_network_marks field.
270 * Finally, the value of the net_thread_marks_none constant is provided for
271 * comparing for equality with the value returned when no bits in the marks
272 * field are changed by the push.
274 * It is not significant that a value of type net_thread_marks_t may
275 * compare as equal to the NULL pointer.
277 struct net_thread_marks
;
278 typedef const struct net_thread_marks
*net_thread_marks_t
;
280 extern const net_thread_marks_t net_thread_marks_none
;
282 extern net_thread_marks_t
net_thread_marks_push(u_int32_t
);
283 extern net_thread_marks_t
net_thread_unmarks_push(u_int32_t
);
284 extern void net_thread_marks_pop(net_thread_marks_t
);
285 extern void net_thread_unmarks_pop(net_thread_marks_t
);
286 extern u_int32_t
net_thread_is_marked(u_int32_t
);
287 extern u_int32_t
net_thread_is_unmarked(u_int32_t
);
289 extern int dlil_output(ifnet_t
, protocol_family_t
, mbuf_t
, void *,
290 const struct sockaddr
*, int, struct flowadv
*);
292 extern void dlil_input_packet_list(struct ifnet
*, struct mbuf
*);
293 extern void dlil_input_packet_list_extended(struct ifnet
*, struct mbuf
*,
294 u_int32_t
, ifnet_model_t
);
296 extern errno_t
dlil_resolve_multi(struct ifnet
*,
297 const struct sockaddr
*, struct sockaddr
*, size_t);
299 extern errno_t
dlil_send_arp(ifnet_t
, u_int16_t
, const struct sockaddr_dl
*,
300 const struct sockaddr
*, const struct sockaddr_dl
*,
301 const struct sockaddr
*, u_int32_t
);
303 extern int dlil_attach_filter(ifnet_t
, const struct iff_filter
*,
304 interface_filter_t
*, u_int32_t
);
305 extern void dlil_detach_filter(interface_filter_t
);
307 extern void dlil_proto_unplumb_all(ifnet_t
);
309 extern int dlil_post_msg(struct ifnet
*, u_int32_t
, u_int32_t
,
310 struct net_event_data
*, u_int32_t
);
312 extern void dlil_post_sifflags_msg(struct ifnet
*);
314 extern int dlil_post_complete_msg(struct ifnet
*, struct kev_msg
*);
316 extern int dlil_alloc_local_stats(struct ifnet
*);
318 extern void ifnet_filter_update_tso(boolean_t filter_enable
);
319 extern errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params
*);
320 extern void dlil_rxpoll_update_params(struct ifnet
*,
321 struct ifnet_poll_params
*);
322 extern void ifnet_poll(struct ifnet
*);
323 extern errno_t
ifnet_input_poll(struct ifnet
*, struct mbuf
*,
324 struct mbuf
*, const struct ifnet_stat_increment_param
*);
328 * dlil_if_acquire is obsolete. Use ifnet_allocate.
330 extern int dlil_if_acquire(u_int32_t
, const void *, size_t, const char *, struct ifnet
**);
332 * dlil_if_release is obsolete. The equivalent is called automatically when
333 * an interface is detached.
335 extern void dlil_if_release(struct ifnet
*ifp
);
337 extern errno_t
dlil_if_ref(struct ifnet
*);
338 extern errno_t
dlil_if_free(struct ifnet
*);
340 extern int dlil_node_present(struct ifnet
*, struct sockaddr
*, int32_t, int,
342 extern void dlil_node_absent(struct ifnet
*, struct sockaddr
*);
343 extern int dlil_node_present_v2(struct ifnet
*, struct sockaddr
*, struct sockaddr_dl
*, int32_t, int,
346 extern const void *dlil_ifaddr_bytes(const struct sockaddr_dl
*, size_t *,
349 extern void dlil_report_issues(struct ifnet
*, u_int8_t
[DLIL_MODIDLEN
],
350 u_int8_t
[DLIL_MODARGLEN
]);
352 #define PROTO_HASH_SLOTS 5
354 extern int proto_hash_value(u_int32_t
);
356 extern const char *dlil_kev_dl_code_str(u_int32_t
);
358 extern errno_t
dlil_rxpoll_set_params(struct ifnet
*,
359 struct ifnet_poll_params
*, boolean_t
);
360 extern errno_t
dlil_rxpoll_get_params(struct ifnet
*,
361 struct ifnet_poll_params
*);
363 extern errno_t
dlil_output_handler(struct ifnet
*, struct mbuf
*);
364 extern errno_t
dlil_input_handler(struct ifnet
*, struct mbuf
*,
365 struct mbuf
*, const struct ifnet_stat_increment_param
*,
366 boolean_t
, struct thread
*);
370 * This is mostly called from the context of the DLIL input thread;
371 * because of that there is no need for atomic operations.
373 __attribute__((always_inline
))
375 ifp_inc_traffic_class_in(struct ifnet
*ifp
, struct mbuf
*m
)
377 if (!(m
->m_flags
& M_PKTHDR
)) {
381 switch (m_get_traffic_class(m
)) {
383 ifp
->if_tc
.ifi_ibepackets
++;
384 ifp
->if_tc
.ifi_ibebytes
+= m
->m_pkthdr
.len
;
387 ifp
->if_tc
.ifi_ibkpackets
++;
388 ifp
->if_tc
.ifi_ibkbytes
+= m
->m_pkthdr
.len
;
391 ifp
->if_tc
.ifi_ivipackets
++;
392 ifp
->if_tc
.ifi_ivibytes
+= m
->m_pkthdr
.len
;
395 ifp
->if_tc
.ifi_ivopackets
++;
396 ifp
->if_tc
.ifi_ivobytes
+= m
->m_pkthdr
.len
;
402 if (mbuf_is_traffic_class_privileged(m
)) {
403 ifp
->if_tc
.ifi_ipvpackets
++;
404 ifp
->if_tc
.ifi_ipvbytes
+= m
->m_pkthdr
.len
;
409 * This is called from DLIL output, hence multiple threads could end
410 * up modifying the statistics. We trade off acccuracy for performance
411 * by not using atomic operations here.
413 __attribute__((always_inline
))
415 ifp_inc_traffic_class_out(struct ifnet
*ifp
, struct mbuf
*m
)
417 if (!(m
->m_flags
& M_PKTHDR
)) {
421 switch (m_get_traffic_class(m
)) {
423 ifp
->if_tc
.ifi_obepackets
++;
424 ifp
->if_tc
.ifi_obebytes
+= m
->m_pkthdr
.len
;
427 ifp
->if_tc
.ifi_obkpackets
++;
428 ifp
->if_tc
.ifi_obkbytes
+= m
->m_pkthdr
.len
;
431 ifp
->if_tc
.ifi_ovipackets
++;
432 ifp
->if_tc
.ifi_ovibytes
+= m
->m_pkthdr
.len
;
435 ifp
->if_tc
.ifi_ovopackets
++;
436 ifp
->if_tc
.ifi_ovobytes
+= m
->m_pkthdr
.len
;
442 if (mbuf_is_traffic_class_privileged(m
)) {
443 ifp
->if_tc
.ifi_opvpackets
++;
444 ifp
->if_tc
.ifi_opvbytes
+= m
->m_pkthdr
.len
;
447 #endif /* BSD_KERNEL_PRIVATE */
448 #endif /* KERNEL_PRIVATE */