2 * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
37 #include <net/if_types.h>
39 #include <net/if_ipsec.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
61 extern int net_qos_policy_restricted
;
62 extern int net_qos_policy_restrict_avapps
;
64 /* Kernel Control functions */
65 static errno_t
ipsec_ctl_setup(u_int32_t
*unit
, void **unitinfo
);
66 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
68 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
70 static errno_t
ipsec_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t unit
,
72 static errno_t
ipsec_ctl_send(kern_ctl_ref kctlref
, u_int32_t unit
,
73 void *unitinfo
, mbuf_t m
, int flags
);
74 static errno_t
ipsec_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t unit
, void *unitinfo
,
75 int opt
, void *data
, size_t *len
);
76 static errno_t
ipsec_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t unit
, void *unitinfo
,
77 int opt
, void *data
, size_t len
);
79 /* Network Interface functions */
80 static void ipsec_start(ifnet_t interface
);
81 static errno_t
ipsec_output(ifnet_t interface
, mbuf_t data
);
82 static errno_t
ipsec_demux(ifnet_t interface
, mbuf_t data
, char *frame_header
,
83 protocol_family_t
*protocol
);
84 static errno_t
ipsec_add_proto(ifnet_t interface
, protocol_family_t protocol
,
85 const struct ifnet_demux_desc
*demux_array
,
86 u_int32_t demux_count
);
87 static errno_t
ipsec_del_proto(ifnet_t interface
, protocol_family_t protocol
);
88 static errno_t
ipsec_ioctl(ifnet_t interface
, u_long cmd
, void *data
);
89 static void ipsec_detached(ifnet_t interface
);
91 /* Protocol handlers */
92 static errno_t
ipsec_attach_proto(ifnet_t interface
, protocol_family_t proto
);
93 static errno_t
ipsec_proto_input(ifnet_t interface
, protocol_family_t protocol
,
94 mbuf_t m
, char *frame_header
);
95 static errno_t
ipsec_proto_pre_output(ifnet_t interface
, protocol_family_t protocol
,
96 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
,
97 char *frame_type
, char *link_layer_dest
);
99 static kern_ctl_ref ipsec_kctlref
;
100 static lck_attr_t
*ipsec_lck_attr
;
101 static lck_grp_attr_t
*ipsec_lck_grp_attr
;
102 static lck_grp_t
*ipsec_lck_grp
;
103 static lck_mtx_t ipsec_lock
;
107 SYSCTL_DECL(_net_ipsec
);
108 SYSCTL_NODE(_net
, OID_AUTO
, ipsec
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "IPsec");
109 static int if_ipsec_verify_interface_creation
= 0;
110 SYSCTL_INT(_net_ipsec
, OID_AUTO
, verify_interface_creation
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_ipsec_verify_interface_creation
, 0, "");
112 #define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
114 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
115 #define IPSEC_IF_DEFAULT_RING_SIZE 64
116 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
117 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
118 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
120 #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
121 #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
122 #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
123 #define IPSEC_NETIF_WMM_RX_RING_COUNT 1
124 #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
125 #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
127 #define IPSEC_IF_MIN_RING_SIZE 8
128 #define IPSEC_IF_MAX_RING_SIZE 1024
130 #define IPSEC_IF_MIN_SLOT_SIZE 1024
131 #define IPSEC_IF_MAX_SLOT_SIZE 4096
133 #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
135 static int if_ipsec_max_pending_input
= IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT
;
137 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
;
138 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
;
139 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
;
141 static int if_ipsec_ring_size
= IPSEC_IF_DEFAULT_RING_SIZE
;
142 static int if_ipsec_tx_fsw_ring_size
= IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE
;
143 static int if_ipsec_rx_fsw_ring_size
= IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE
;
145 SYSCTL_INT(_net_ipsec
, OID_AUTO
, max_pending_input
, CTLFLAG_LOCKED
| CTLFLAG_RW
, &if_ipsec_max_pending_input
, 0, "");
146 SYSCTL_PROC(_net_ipsec
, OID_AUTO
, ring_size
, CTLTYPE_INT
| CTLFLAG_LOCKED
| CTLFLAG_RW
,
147 &if_ipsec_ring_size
, IPSEC_IF_DEFAULT_RING_SIZE
, &sysctl_if_ipsec_ring_size
, "I", "");
148 SYSCTL_PROC(_net_ipsec
, OID_AUTO
, tx_fsw_ring_size
, CTLTYPE_INT
| CTLFLAG_LOCKED
| CTLFLAG_RW
,
149 &if_ipsec_tx_fsw_ring_size
, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE
, &sysctl_if_ipsec_tx_fsw_ring_size
, "I", "");
150 SYSCTL_PROC(_net_ipsec
, OID_AUTO
, rx_fsw_ring_size
, CTLTYPE_INT
| CTLFLAG_LOCKED
| CTLFLAG_RW
,
151 &if_ipsec_rx_fsw_ring_size
, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE
, &sysctl_if_ipsec_rx_fsw_ring_size
, "I", "");
153 static int if_ipsec_debug
= 0;
154 SYSCTL_INT(_net_ipsec
, OID_AUTO
, debug
, CTLFLAG_LOCKED
| CTLFLAG_RW
, &if_ipsec_debug
, 0, "");
157 ipsec_register_nexus(void);
159 typedef struct ipsec_nx
{
169 static nexus_controller_t ipsec_ncd
;
170 static int ipsec_ncd_refcount
;
171 static uuid_t ipsec_kpipe_uuid
;
173 #endif // IPSEC_NEXUS
175 /* Control block allocated for each kernel control connection */
177 TAILQ_ENTRY(ipsec_pcb
) ipsec_chain
;
178 kern_ctl_ref ipsec_ctlref
;
180 u_int32_t ipsec_unit
;
181 u_int32_t ipsec_unique_id
;
182 // These external flags can be set with IPSEC_OPT_FLAGS
183 u_int32_t ipsec_external_flags
;
184 // These internal flags are only used within this driver
185 u_int32_t ipsec_internal_flags
;
186 u_int32_t ipsec_input_frag_size
;
187 bool ipsec_frag_size_set
;
188 int ipsec_ext_ifdata_stats
;
189 mbuf_svc_class_t ipsec_output_service_class
;
190 char ipsec_if_xname
[IFXNAMSIZ
];
191 char ipsec_unique_name
[IFXNAMSIZ
];
192 // PCB lock protects state fields, like ipsec_kpipe_count
193 decl_lck_rw_data(, ipsec_pcb_lock
);
194 // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
195 decl_lck_mtx_data(, ipsec_pcb_data_move_lock
);
196 u_int32_t ipsec_pcb_data_move
; /* number of data moving contexts */
197 u_int32_t ipsec_pcb_drainers
; /* number of threads waiting to drain */
198 u_int32_t ipsec_pcb_data_path_state
; /* internal state of interface data path */
201 lck_mtx_t ipsec_input_chain_lock
;
202 lck_mtx_t ipsec_kpipe_encrypt_lock
;
203 lck_mtx_t ipsec_kpipe_decrypt_lock
;
204 struct mbuf
* ipsec_input_chain
;
205 struct mbuf
* ipsec_input_chain_last
;
206 u_int32_t ipsec_input_chain_count
;
207 // Input chain lock protects the list of input mbufs
208 // The input chain lock must be taken AFTER the PCB lock if both are held
209 struct ipsec_nx ipsec_nx
;
210 u_int32_t ipsec_kpipe_count
;
211 pid_t ipsec_kpipe_pid
;
212 uuid_t ipsec_kpipe_uuid
[IPSEC_IF_MAX_RING_COUNT
];
213 void * ipsec_kpipe_rxring
[IPSEC_IF_MAX_RING_COUNT
];
214 void * ipsec_kpipe_txring
[IPSEC_IF_MAX_RING_COUNT
];
215 kern_pbufpool_t ipsec_kpipe_pp
;
216 u_int32_t ipsec_kpipe_tx_ring_size
;
217 u_int32_t ipsec_kpipe_rx_ring_size
;
219 kern_nexus_t ipsec_netif_nexus
;
220 kern_pbufpool_t ipsec_netif_pp
;
221 void * ipsec_netif_rxring
[IPSEC_NETIF_MAX_RX_RING_COUNT
];
222 void * ipsec_netif_txring
[IPSEC_NETIF_MAX_TX_RING_COUNT
];
223 uint64_t ipsec_netif_txring_size
;
225 u_int32_t ipsec_slot_size
;
226 u_int32_t ipsec_netif_ring_size
;
227 u_int32_t ipsec_tx_fsw_ring_size
;
228 u_int32_t ipsec_rx_fsw_ring_size
;
229 bool ipsec_use_netif
;
230 bool ipsec_needs_netagent
;
231 #endif // IPSEC_NEXUS
234 /* These are internal flags not exposed outside this file */
235 #define IPSEC_FLAGS_KPIPE_ALLOCATED 1
237 /* data movement refcounting functions */
238 static void ipsec_wait_data_move_drain(struct ipsec_pcb
*pcb
);
240 /* Data path states */
241 #define IPSEC_PCB_DATA_PATH_READY 0x1
243 /* Macros to set/clear/test data path states */
244 #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
245 #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
246 #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
249 /* Macros to clear/set/test flags. */
251 ipsec_flag_set(struct ipsec_pcb
*pcb
, uint32_t flag
)
253 pcb
->ipsec_internal_flags
|= flag
;
256 ipsec_flag_clr(struct ipsec_pcb
*pcb
, uint32_t flag
)
258 pcb
->ipsec_internal_flags
&= ~flag
;
262 ipsec_flag_isset(struct ipsec_pcb
*pcb
, uint32_t flag
)
264 return !!(pcb
->ipsec_internal_flags
& flag
);
266 #endif // IPSEC_NEXUS
268 TAILQ_HEAD(ipsec_list
, ipsec_pcb
) ipsec_head
;
270 static ZONE_DECLARE(ipsec_pcb_zone
, "net.if_ipsec",
271 sizeof(struct ipsec_pcb
), ZC_ZFREE_CLEARMEM
);
273 #define IPSECQ_MAXLEN 256
277 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
279 #pragma unused(arg1, arg2)
280 int value
= if_ipsec_ring_size
;
282 int error
= sysctl_handle_int(oidp
, &value
, 0, req
);
283 if (error
|| !req
->newptr
) {
287 if (value
< IPSEC_IF_MIN_RING_SIZE
||
288 value
> IPSEC_IF_MAX_RING_SIZE
) {
292 if_ipsec_ring_size
= value
;
298 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
300 #pragma unused(arg1, arg2)
301 int value
= if_ipsec_tx_fsw_ring_size
;
303 int error
= sysctl_handle_int(oidp
, &value
, 0, req
);
304 if (error
|| !req
->newptr
) {
308 if (value
< IPSEC_IF_MIN_RING_SIZE
||
309 value
> IPSEC_IF_MAX_RING_SIZE
) {
313 if_ipsec_tx_fsw_ring_size
= value
;
319 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
321 #pragma unused(arg1, arg2)
322 int value
= if_ipsec_rx_fsw_ring_size
;
324 int error
= sysctl_handle_int(oidp
, &value
, 0, req
);
325 if (error
|| !req
->newptr
) {
329 if (value
< IPSEC_IF_MIN_RING_SIZE
||
330 value
> IPSEC_IF_MAX_RING_SIZE
) {
334 if_ipsec_rx_fsw_ring_size
= value
;
341 ipsec_in_wmm_mode(struct ipsec_pcb
*pcb
)
343 return pcb
->ipsec_kpipe_count
== IPSEC_IF_WMM_RING_COUNT
;
346 #endif // IPSEC_NEXUS
349 ipsec_register_control(void)
351 struct kern_ctl_reg kern_ctl
;
355 ipsec_register_nexus();
356 #endif // IPSEC_NEXUS
358 TAILQ_INIT(&ipsec_head
);
360 bzero(&kern_ctl
, sizeof(kern_ctl
));
361 strlcpy(kern_ctl
.ctl_name
, IPSEC_CONTROL_NAME
, sizeof(kern_ctl
.ctl_name
));
362 kern_ctl
.ctl_name
[sizeof(kern_ctl
.ctl_name
) - 1] = 0;
363 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
| CTL_FLAG_REG_SETUP
; /* Require root */
364 kern_ctl
.ctl_sendsize
= 64 * 1024;
365 kern_ctl
.ctl_recvsize
= 64 * 1024;
366 kern_ctl
.ctl_setup
= ipsec_ctl_setup
;
367 kern_ctl
.ctl_bind
= ipsec_ctl_bind
;
368 kern_ctl
.ctl_connect
= ipsec_ctl_connect
;
369 kern_ctl
.ctl_disconnect
= ipsec_ctl_disconnect
;
370 kern_ctl
.ctl_send
= ipsec_ctl_send
;
371 kern_ctl
.ctl_setopt
= ipsec_ctl_setopt
;
372 kern_ctl
.ctl_getopt
= ipsec_ctl_getopt
;
374 result
= ctl_register(&kern_ctl
, &ipsec_kctlref
);
376 os_log_error(OS_LOG_DEFAULT
, "ipsec_register_control - ctl_register failed: %d\n", result
);
380 /* Register the protocol plumbers */
381 if ((result
= proto_register_plumber(PF_INET
, IFNET_FAMILY_IPSEC
,
382 ipsec_attach_proto
, NULL
)) != 0) {
383 os_log_error(OS_LOG_DEFAULT
, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
385 ctl_deregister(ipsec_kctlref
);
389 /* Register the protocol plumbers */
390 if ((result
= proto_register_plumber(PF_INET6
, IFNET_FAMILY_IPSEC
,
391 ipsec_attach_proto
, NULL
)) != 0) {
392 proto_unregister_plumber(PF_INET
, IFNET_FAMILY_IPSEC
);
393 ctl_deregister(ipsec_kctlref
);
394 os_log_error(OS_LOG_DEFAULT
, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
399 ipsec_lck_attr
= lck_attr_alloc_init();
400 ipsec_lck_grp_attr
= lck_grp_attr_alloc_init();
401 ipsec_lck_grp
= lck_grp_alloc_init("ipsec", ipsec_lck_grp_attr
);
402 lck_mtx_init(&ipsec_lock
, ipsec_lck_grp
, ipsec_lck_attr
);
409 ipsec_interface_isvalid(ifnet_t interface
)
411 struct ipsec_pcb
*pcb
= NULL
;
413 if (interface
== NULL
) {
417 pcb
= ifnet_softc(interface
);
423 /* When ctl disconnects, ipsec_unit is set to 0 */
424 if (pcb
->ipsec_unit
== 0) {
433 ipsec_interface_needs_netagent(ifnet_t interface
)
435 struct ipsec_pcb
*pcb
= NULL
;
437 if (interface
== NULL
) {
441 pcb
= ifnet_softc(interface
);
447 return pcb
->ipsec_needs_netagent
== true;
449 #endif // IPSEC_NEXUS
452 ipsec_ifnet_set_attrs(ifnet_t ifp
)
454 /* Set flags and additional information. */
455 ifnet_set_mtu(ifp
, 1500);
456 ifnet_set_flags(ifp
, IFF_UP
| IFF_MULTICAST
| IFF_POINTOPOINT
, 0xffff);
458 /* The interface must generate its own IPv6 LinkLocal address,
459 * if possible following the recommendation of RFC2472 to the 64bit interface ID
461 ifnet_set_eflags(ifp
, IFEF_NOAUTOIPV6LL
, IFEF_NOAUTOIPV6LL
);
464 /* Reset the stats in case as the interface may have been recycled */
465 struct ifnet_stats_param stats
;
466 bzero(&stats
, sizeof(struct ifnet_stats_param
));
467 ifnet_set_stat(ifp
, &stats
);
468 #endif // !IPSEC_NEXUS
475 static uuid_t ipsec_nx_dom_prov
;
478 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov
)
484 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov
)
490 ipsec_register_nexus(void)
492 const struct kern_nexus_domain_provider_init dp_init
= {
493 .nxdpi_version
= KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION
,
495 .nxdpi_init
= ipsec_nxdp_init
,
496 .nxdpi_fini
= ipsec_nxdp_fini
500 /* ipsec_nxdp_init() is called before this function returns */
501 err
= kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF
,
502 (const uint8_t *) "com.apple.ipsec",
503 &dp_init
, sizeof(dp_init
),
506 os_log_error(OS_LOG_DEFAULT
, "%s: failed to register domain provider\n", __func__
);
513 ipsec_netif_prepare(kern_nexus_t nexus
, ifnet_t ifp
)
515 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
516 pcb
->ipsec_netif_nexus
= nexus
;
517 return ipsec_ifnet_set_attrs(ifp
);
521 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov
,
522 proc_t p
, kern_nexus_t nexus
,
523 nexus_port_t nexus_port
, kern_channel_t channel
, void **ch_ctx
)
525 #pragma unused(nxprov, p)
526 #pragma unused(nexus, nexus_port, channel, ch_ctx)
531 ipsec_nexus_connected(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
532 kern_channel_t channel
)
534 #pragma unused(nxprov, channel)
535 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
536 boolean_t ok
= ifnet_is_attached(pcb
->ipsec_ifp
, 1);
537 /* Mark the data path as ready */
539 lck_mtx_lock(&pcb
->ipsec_pcb_data_move_lock
);
540 IPSEC_SET_DATA_PATH_READY(pcb
);
541 lck_mtx_unlock(&pcb
->ipsec_pcb_data_move_lock
);
543 return ok
? 0 : ENXIO
;
547 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
548 kern_channel_t channel
)
550 #pragma unused(nxprov, channel)
551 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
553 VERIFY(pcb
->ipsec_kpipe_count
!= 0);
555 /* Wait until all threads in the data paths are done. */
556 ipsec_wait_data_move_drain(pcb
);
560 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
561 kern_channel_t channel
)
563 #pragma unused(nxprov, channel)
564 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
566 /* Wait until all threads in the data paths are done. */
567 ipsec_wait_data_move_drain(pcb
);
571 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
572 kern_channel_t channel
)
574 #pragma unused(nxprov, channel)
575 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
576 if (pcb
->ipsec_netif_nexus
== nexus
) {
577 pcb
->ipsec_netif_nexus
= NULL
;
579 ifnet_decr_iorefcnt(pcb
->ipsec_ifp
);
583 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
584 kern_channel_t channel
, kern_channel_ring_t ring
, boolean_t is_tx_ring
,
587 #pragma unused(nxprov)
588 #pragma unused(channel)
589 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
592 for (ring_idx
= 0; ring_idx
< pcb
->ipsec_kpipe_count
; ring_idx
++) {
593 if (!uuid_compare(channel
->ch_info
->cinfo_nx_uuid
, pcb
->ipsec_kpipe_uuid
[ring_idx
])) {
598 if (ring_idx
== pcb
->ipsec_kpipe_count
) {
599 uuid_string_t uuidstr
;
600 uuid_unparse(channel
->ch_info
->cinfo_nx_uuid
, uuidstr
);
601 os_log_error(OS_LOG_DEFAULT
, "%s: %s cannot find channel %s\n", __func__
, pcb
->ipsec_if_xname
, uuidstr
);
605 *ring_ctx
= (void *)(uintptr_t)ring_idx
;
608 VERIFY(pcb
->ipsec_kpipe_rxring
[ring_idx
] == NULL
);
609 pcb
->ipsec_kpipe_rxring
[ring_idx
] = ring
;
611 VERIFY(pcb
->ipsec_kpipe_txring
[ring_idx
] == NULL
);
612 pcb
->ipsec_kpipe_txring
[ring_idx
] = ring
;
618 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
619 kern_channel_ring_t ring
)
621 #pragma unused(nxprov)
623 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
625 for (unsigned int i
= 0; i
< pcb
->ipsec_kpipe_count
; i
++) {
626 if (pcb
->ipsec_kpipe_rxring
[i
] == ring
) {
627 pcb
->ipsec_kpipe_rxring
[i
] = NULL
;
629 } else if (pcb
->ipsec_kpipe_txring
[i
] == ring
) {
630 pcb
->ipsec_kpipe_txring
[i
] = NULL
;
638 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
639 kern_channel_ring_t tx_ring
, uint32_t flags
)
641 #pragma unused(nxprov)
642 #pragma unused(flags)
643 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
645 if (!ipsec_data_move_begin(pcb
)) {
646 os_log_info(OS_LOG_DEFAULT
, "%s: data path stopped for %s\n", __func__
, if_name(pcb
->ipsec_ifp
));
650 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
652 if (!ipsec_flag_isset(pcb
, IPSEC_FLAGS_KPIPE_ALLOCATED
)) {
653 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
654 ipsec_data_move_end(pcb
);
658 VERIFY(pcb
->ipsec_kpipe_count
);
660 kern_channel_slot_t tx_slot
= kern_channel_get_next_slot(tx_ring
, NULL
, NULL
);
661 if (tx_slot
== NULL
) {
662 // Nothing to write, bail
663 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
664 ipsec_data_move_end(pcb
);
668 // Signal the netif ring to read
669 kern_channel_ring_t rx_ring
= pcb
->ipsec_netif_rxring
[0];
670 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
672 if (rx_ring
!= NULL
) {
673 kern_channel_notify(rx_ring
, 0);
676 ipsec_data_move_end(pcb
);
681 ipsec_encrypt_mbuf(ifnet_t interface
,
684 struct ipsec_output_state ipsec_state
;
688 // Make sure this packet isn't looping through the interface
689 if (necp_get_last_interface_index_from_packet(data
) == interface
->if_index
) {
691 goto ipsec_output_err
;
694 // Mark the interface so NECP can evaluate tunnel policy
695 necp_mark_packet_from_interface(data
, interface
);
697 struct ip
*ip
= mtod(data
, struct ip
*);
698 u_int ip_version
= ip
->ip_v
;
700 switch (ip_version
) {
704 memset(&ipsec_state
, 0, sizeof(ipsec_state
));
705 ipsec_state
.m
= data
;
706 ipsec_state
.dst
= (struct sockaddr
*)&ip
->ip_dst
;
707 memset(&ipsec_state
.ro
, 0, sizeof(ipsec_state
.ro
));
709 error
= ipsec4_interface_output(&ipsec_state
, interface
);
710 if (error
== 0 && ipsec_state
.tunneled
== 6) {
711 // Tunneled in IPv6 - packet is gone
712 // TODO: Don't lose mbuf
717 data
= ipsec_state
.m
;
718 if (error
|| data
== NULL
) {
720 os_log_error(OS_LOG_DEFAULT
, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error
);
722 goto ipsec_output_err
;
729 data
= ipsec6_splithdr(data
);
731 os_log_error(OS_LOG_DEFAULT
, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
732 goto ipsec_output_err
;
735 struct ip6_hdr
*ip6
= mtod(data
, struct ip6_hdr
*);
737 memset(&ipsec_state
, 0, sizeof(ipsec_state
));
738 ipsec_state
.m
= data
;
739 ipsec_state
.dst
= (struct sockaddr
*)&ip6
->ip6_dst
;
740 memset(&ipsec_state
.ro
, 0, sizeof(ipsec_state
.ro
));
742 error
= ipsec6_interface_output(&ipsec_state
, interface
, &ip6
->ip6_nxt
, ipsec_state
.m
);
743 if (error
== 0 && ipsec_state
.tunneled
== 4) {
744 // Tunneled in IPv4 - packet is gone
745 // TODO: Don't lose mbuf
749 data
= ipsec_state
.m
;
750 if (error
|| data
== NULL
) {
752 os_log_error(OS_LOG_DEFAULT
, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error
);
754 goto ipsec_output_err
;
759 os_log_error(OS_LOG_DEFAULT
, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version
);
761 goto ipsec_output_err
;
776 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
777 kern_channel_ring_t rx_ring
, uint32_t flags
)
779 #pragma unused(nxprov)
780 #pragma unused(flags)
781 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
782 struct kern_channel_ring_stat_increment rx_ring_stats
;
783 uint8_t ring_idx
= (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring
);
785 if (!ipsec_data_move_begin(pcb
)) {
786 os_log_error(OS_LOG_DEFAULT
, "%s: data path stopped for %s\n", __func__
, if_name(pcb
->ipsec_ifp
));
790 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
792 if (!ipsec_flag_isset(pcb
, IPSEC_FLAGS_KPIPE_ALLOCATED
)) {
793 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
794 ipsec_data_move_end(pcb
);
798 VERIFY(pcb
->ipsec_kpipe_count
);
799 VERIFY(ring_idx
<= pcb
->ipsec_kpipe_count
);
801 // Reclaim user-released slots
802 (void) kern_channel_reclaim(rx_ring
);
804 uint32_t avail
= kern_channel_available_slot_count(rx_ring
);
806 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
807 os_log_error(OS_LOG_DEFAULT
, "%s: %s ring %s index %d no room in rx_ring\n", __func__
,
808 pcb
->ipsec_if_xname
, rx_ring
->ckr_name
, ring_idx
);
809 ipsec_data_move_end(pcb
);
813 kern_channel_ring_t tx_ring
= pcb
->ipsec_netif_txring
[ring_idx
];
814 if (tx_ring
== NULL
) {
815 // Net-If TX ring not set up yet, nothing to read
816 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
817 os_log_error(OS_LOG_DEFAULT
, "%s: %s ring %s index %d bad netif_txring 1\n", __func__
,
818 pcb
->ipsec_if_xname
, rx_ring
->ckr_name
, ring_idx
);
819 ipsec_data_move_end(pcb
);
823 struct netif_stats
*nifs
= &NX_NETIF_PRIVATE(pcb
->ipsec_netif_nexus
)->nif_stats
;
825 // Unlock ipsec before entering ring
826 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
828 (void)kr_enter(tx_ring
, TRUE
);
830 // Lock again after entering and validate
831 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
832 if (tx_ring
!= pcb
->ipsec_netif_txring
[ring_idx
]) {
833 // Ring no longer valid
834 // Unlock first, then exit ring
835 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
837 os_log_error(OS_LOG_DEFAULT
, "%s: %s ring %s index %d bad netif_txring 2\n", __func__
,
838 pcb
->ipsec_if_xname
, rx_ring
->ckr_name
, ring_idx
);
839 ipsec_data_move_end(pcb
);
843 struct kern_channel_ring_stat_increment tx_ring_stats
;
844 bzero(&tx_ring_stats
, sizeof(tx_ring_stats
));
845 kern_channel_slot_t tx_pslot
= NULL
;
846 kern_channel_slot_t tx_slot
= kern_channel_get_next_slot(tx_ring
, NULL
, NULL
);
847 if (tx_slot
== NULL
) {
848 // Nothing to read, don't bother signalling
849 // Unlock first, then exit ring
850 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
852 ipsec_data_move_end(pcb
);
856 struct kern_pbufpool
*rx_pp
= rx_ring
->ckr_pp
;
857 VERIFY(rx_pp
!= NULL
);
858 bzero(&rx_ring_stats
, sizeof(rx_ring_stats
));
859 kern_channel_slot_t rx_pslot
= NULL
;
860 kern_channel_slot_t rx_slot
= kern_channel_get_next_slot(rx_ring
, NULL
, NULL
);
862 while (rx_slot
!= NULL
&& tx_slot
!= NULL
) {
867 // Allocate rx packet
868 kern_packet_t rx_ph
= 0;
869 error
= kern_pbufpool_alloc_nosleep(rx_pp
, 1, &rx_ph
);
870 if (__improbable(error
!= 0)) {
871 os_log_error(OS_LOG_DEFAULT
, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
872 pcb
->ipsec_ifp
->if_xname
);
876 kern_packet_t tx_ph
= kern_channel_slot_get_packet(tx_ring
, tx_slot
);
880 tx_slot
= kern_channel_get_next_slot(tx_ring
, tx_slot
, NULL
);
883 kern_pbufpool_free(rx_pp
, rx_ph
);
887 kern_buflet_t tx_buf
= kern_packet_get_next_buflet(tx_ph
, NULL
);
888 VERIFY(tx_buf
!= NULL
);
889 uint8_t *tx_baddr
= kern_buflet_get_data_address(tx_buf
);
890 VERIFY(tx_baddr
!= NULL
);
891 tx_baddr
+= kern_buflet_get_data_offset(tx_buf
);
893 bpf_tap_packet_out(pcb
->ipsec_ifp
, DLT_RAW
, tx_ph
, NULL
, 0);
895 length
= MIN(kern_packet_get_data_length(tx_ph
),
896 pcb
->ipsec_slot_size
);
898 // Increment TX stats
899 tx_ring_stats
.kcrsi_slots_transferred
++;
900 tx_ring_stats
.kcrsi_bytes_transferred
+= length
;
903 error
= mbuf_gethdr(MBUF_DONTWAIT
, MBUF_TYPE_HEADER
, &data
);
905 error
= mbuf_copyback(data
, 0, length
, tx_baddr
, MBUF_DONTWAIT
);
907 // Encrypt and send packet
908 lck_mtx_lock(&pcb
->ipsec_kpipe_encrypt_lock
);
909 data
= ipsec_encrypt_mbuf(pcb
->ipsec_ifp
, data
);
910 lck_mtx_unlock(&pcb
->ipsec_kpipe_encrypt_lock
);
912 os_log_error(OS_LOG_DEFAULT
, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb
->ipsec_ifp
->if_xname
, length
, error
);
913 STATS_INC(nifs
, NETIF_STATS_DROP_NOMEM_MBUF
);
914 STATS_INC(nifs
, NETIF_STATS_DROP
);
919 os_log_error(OS_LOG_DEFAULT
, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb
->ipsec_ifp
->if_xname
, error
);
920 STATS_INC(nifs
, NETIF_STATS_DROP_NOMEM_MBUF
);
921 STATS_INC(nifs
, NETIF_STATS_DROP
);
924 os_log_error(OS_LOG_DEFAULT
, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb
->ipsec_ifp
->if_xname
);
925 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
926 STATS_INC(nifs
, NETIF_STATS_DROP
);
930 os_log_error(OS_LOG_DEFAULT
, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb
->ipsec_ifp
->if_xname
);
931 kern_pbufpool_free(rx_pp
, rx_ph
);
935 length
= mbuf_pkthdr_len(data
);
936 if (length
> rx_pp
->pp_buflet_size
) {
939 kern_pbufpool_free(rx_pp
, rx_ph
);
940 os_log_error(OS_LOG_DEFAULT
, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
941 pcb
->ipsec_ifp
->if_xname
, length
, rx_pp
->pp_buflet_size
);
946 kern_buflet_t rx_buf
= kern_packet_get_next_buflet(rx_ph
, NULL
);
947 VERIFY(rx_buf
!= NULL
);
948 void *rx_baddr
= kern_buflet_get_data_address(rx_buf
);
949 VERIFY(rx_baddr
!= NULL
);
951 // Copy-in data from mbuf to buflet
952 mbuf_copydata(data
, 0, length
, (void *)rx_baddr
);
953 kern_packet_clear_flow_uuid(rx_ph
); // Zero flow id
955 // Finalize and attach the packet
956 error
= kern_buflet_set_data_offset(rx_buf
, 0);
958 error
= kern_buflet_set_data_length(rx_buf
, length
);
960 error
= kern_packet_finalize(rx_ph
);
962 error
= kern_channel_slot_attach_packet(rx_ring
, rx_slot
, rx_ph
);
965 STATS_INC(nifs
, NETIF_STATS_TX_PACKETS
);
966 STATS_INC(nifs
, NETIF_STATS_TX_COPY_DIRECT
);
968 rx_ring_stats
.kcrsi_slots_transferred
++;
969 rx_ring_stats
.kcrsi_bytes_transferred
+= length
;
971 if (!pcb
->ipsec_ext_ifdata_stats
) {
972 ifnet_stat_increment_out(pcb
->ipsec_ifp
, 1, length
, 0);
978 rx_slot
= kern_channel_get_next_slot(rx_ring
, rx_slot
, NULL
);
982 kern_channel_advance_slot(rx_ring
, rx_pslot
);
983 kern_channel_increment_ring_net_stats(rx_ring
, pcb
->ipsec_ifp
, &rx_ring_stats
);
987 kern_channel_advance_slot(tx_ring
, tx_pslot
);
988 kern_channel_increment_ring_net_stats(tx_ring
, pcb
->ipsec_ifp
, &tx_ring_stats
);
989 (void)kern_channel_reclaim(tx_ring
);
992 /* always reenable output */
993 errno_t error
= ifnet_enable_output(pcb
->ipsec_ifp
);
995 os_log_error(OS_LOG_DEFAULT
, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error
);
998 // Unlock first, then exit ring
999 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1001 if (tx_pslot
!= NULL
) {
1002 kern_channel_notify(tx_ring
, 0);
1006 ipsec_data_move_end(pcb
);
1011 ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class
)
1013 switch (svc_class
) {
1034 ipsec_netif_ring_init(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
1035 kern_channel_t channel
, kern_channel_ring_t ring
, boolean_t is_tx_ring
,
1038 #pragma unused(nxprov)
1039 #pragma unused(channel)
1040 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
1043 VERIFY(pcb
->ipsec_netif_rxring
[0] == NULL
);
1044 pcb
->ipsec_netif_rxring
[0] = ring
;
1046 uint8_t ring_idx
= 0;
1047 if (ipsec_in_wmm_mode(pcb
)) {
1049 kern_packet_svc_class_t svc_class
;
1050 err
= kern_channel_get_service_class(ring
, &svc_class
);
1052 ring_idx
= ipsec_find_tx_ring_by_svc(svc_class
);
1053 VERIFY(ring_idx
< IPSEC_IF_WMM_RING_COUNT
);
1056 *ring_ctx
= (void *)(uintptr_t)ring_idx
;
1058 VERIFY(pcb
->ipsec_netif_txring
[ring_idx
] == NULL
);
1059 pcb
->ipsec_netif_txring
[ring_idx
] = ring
;
1065 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
1066 kern_channel_ring_t ring
)
1068 #pragma unused(nxprov)
1069 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
1072 for (int i
= 0; i
< IPSEC_NETIF_MAX_RX_RING_COUNT
; i
++) {
1073 if (pcb
->ipsec_netif_rxring
[i
] == ring
) {
1074 pcb
->ipsec_netif_rxring
[i
] = NULL
;
1079 for (int i
= 0; i
< IPSEC_NETIF_MAX_TX_RING_COUNT
; i
++) {
1080 if (pcb
->ipsec_netif_txring
[i
] == ring
) {
1081 pcb
->ipsec_netif_txring
[i
] = NULL
;
1090 ipsec_netif_check_policy(ifnet_t interface
, mbuf_t data
)
1092 necp_kernel_policy_result necp_result
= 0;
1093 necp_kernel_policy_result_parameter necp_result_parameter
= {};
1094 uint32_t necp_matched_policy_id
= 0;
1095 struct ip_out_args args4
= { };
1096 struct ip6_out_args args6
= { };
1098 // This packet has been marked with IP level policy, do not mark again.
1099 if (data
&& data
->m_pkthdr
.necp_mtag
.necp_policy_id
>= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP
) {
1103 size_t length
= mbuf_pkthdr_len(data
);
1104 if (length
< sizeof(struct ip
)) {
1108 struct ip
*ip
= mtod(data
, struct ip
*);
1109 u_int ip_version
= ip
->ip_v
;
1110 switch (ip_version
) {
1112 if (interface
!= NULL
) {
1113 args4
.ipoa_flags
|= IPOAF_BOUND_IF
;
1114 args4
.ipoa_boundif
= interface
->if_index
;
1116 necp_matched_policy_id
= necp_ip_output_find_policy_match(data
, IP_OUTARGS
, &args4
, NULL
,
1117 &necp_result
, &necp_result_parameter
);
1121 if (interface
!= NULL
) {
1122 args6
.ip6oa_flags
|= IP6OAF_BOUND_IF
;
1123 args6
.ip6oa_boundif
= interface
->if_index
;
1125 necp_matched_policy_id
= necp_ip6_output_find_policy_match(data
, IPV6_OUTARGS
, &args6
, NULL
,
1126 &necp_result
, &necp_result_parameter
);
1134 if (necp_result
== NECP_KERNEL_POLICY_RESULT_DROP
||
1135 necp_result
== NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT
) {
1136 /* Drop and flow divert packets should be blocked at the IP layer */
1140 necp_mark_packet_from_ip(data
, necp_matched_policy_id
);
1145 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
1146 kern_channel_ring_t tx_ring
, uint32_t flags
)
1148 #pragma unused(nxprov)
1149 #pragma unused(flags)
1150 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
1152 struct netif_stats
*nifs
= &NX_NETIF_PRIVATE(nexus
)->nif_stats
;
1154 if (!ipsec_data_move_begin(pcb
)) {
1155 os_log_error(OS_LOG_DEFAULT
, "%s: data path stopped for %s\n", __func__
, if_name(pcb
->ipsec_ifp
));
1159 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
1161 struct kern_channel_ring_stat_increment tx_ring_stats
;
1162 bzero(&tx_ring_stats
, sizeof(tx_ring_stats
));
1163 kern_channel_slot_t tx_pslot
= NULL
;
1164 kern_channel_slot_t tx_slot
= kern_channel_get_next_slot(tx_ring
, NULL
, NULL
);
1166 STATS_INC(nifs
, NETIF_STATS_TX_SYNC
);
1168 if (tx_slot
== NULL
) {
1169 // Nothing to write, don't bother signalling
1170 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1171 ipsec_data_move_end(pcb
);
1175 if (pcb
->ipsec_kpipe_count
&&
1176 ipsec_flag_isset(pcb
, IPSEC_FLAGS_KPIPE_ALLOCATED
)) {
1177 // Select the corresponding kpipe rx ring
1178 uint8_t ring_idx
= (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring
);
1179 VERIFY(ring_idx
< IPSEC_IF_MAX_RING_COUNT
);
1180 kern_channel_ring_t rx_ring
= pcb
->ipsec_kpipe_rxring
[ring_idx
];
1182 // Unlock while calling notify
1183 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1185 // Signal the kernel pipe ring to read
1186 if (rx_ring
!= NULL
) {
1187 kern_channel_notify(rx_ring
, 0);
1190 ipsec_data_move_end(pcb
);
1194 // If we're here, we're injecting into the BSD stack
1195 while (tx_slot
!= NULL
) {
1199 kern_packet_t tx_ph
= kern_channel_slot_get_packet(tx_ring
, tx_slot
);
1203 tx_slot
= kern_channel_get_next_slot(tx_ring
, tx_slot
, NULL
);
1209 kern_buflet_t tx_buf
= kern_packet_get_next_buflet(tx_ph
, NULL
);
1210 VERIFY(tx_buf
!= NULL
);
1211 uint8_t *tx_baddr
= kern_buflet_get_data_address(tx_buf
);
1212 VERIFY(tx_baddr
!= 0);
1213 tx_baddr
+= kern_buflet_get_data_offset(tx_buf
);
1215 bpf_tap_packet_out(pcb
->ipsec_ifp
, DLT_RAW
, tx_ph
, NULL
, 0);
1217 length
= MIN(kern_packet_get_data_length(tx_ph
),
1218 pcb
->ipsec_slot_size
);
1221 errno_t error
= mbuf_gethdr(MBUF_DONTWAIT
, MBUF_TYPE_HEADER
, &data
);
1223 error
= mbuf_copyback(data
, 0, length
, tx_baddr
, MBUF_DONTWAIT
);
1225 // Mark packet from policy
1226 uint32_t policy_id
= kern_packet_get_policy_id(tx_ph
);
1227 necp_mark_packet_from_ip(data
, policy_id
);
1229 // Check policy with NECP
1230 if (!ipsec_netif_check_policy(pcb
->ipsec_ifp
, data
)) {
1231 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_tx %s - failed policy check\n", pcb
->ipsec_ifp
->if_xname
);
1232 STATS_INC(nifs
, NETIF_STATS_DROP
);
1236 // Send through encryption
1237 error
= ipsec_output(pcb
->ipsec_ifp
, data
);
1239 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb
->ipsec_ifp
->if_xname
, error
);
1243 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb
->ipsec_ifp
->if_xname
, length
, error
);
1244 STATS_INC(nifs
, NETIF_STATS_DROP_NOMEM_MBUF
);
1245 STATS_INC(nifs
, NETIF_STATS_DROP
);
1250 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb
->ipsec_ifp
->if_xname
, error
);
1251 STATS_INC(nifs
, NETIF_STATS_DROP_NOMEM_MBUF
);
1252 STATS_INC(nifs
, NETIF_STATS_DROP
);
1255 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb
->ipsec_ifp
->if_xname
);
1256 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1257 STATS_INC(nifs
, NETIF_STATS_DROP
);
1261 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb
->ipsec_ifp
->if_xname
);
1265 STATS_INC(nifs
, NETIF_STATS_TX_PACKETS
);
1266 STATS_INC(nifs
, NETIF_STATS_TX_COPY_MBUF
);
1268 tx_ring_stats
.kcrsi_slots_transferred
++;
1269 tx_ring_stats
.kcrsi_bytes_transferred
+= length
;
1273 kern_channel_advance_slot(tx_ring
, tx_pslot
);
1274 kern_channel_increment_ring_net_stats(tx_ring
, pcb
->ipsec_ifp
, &tx_ring_stats
);
1275 (void)kern_channel_reclaim(tx_ring
);
1278 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1279 ipsec_data_move_end(pcb
);
1285 ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
1286 kern_channel_ring_t ring
, uint32_t flags
, uint8_t ring_idx
)
1288 #pragma unused(nxprov)
1289 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
1290 boolean_t more
= false;
1293 VERIFY((flags
& KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL
) == 0);
1296 * Refill and sync the ring; we may be racing against another thread doing
1297 * an RX sync that also wants to do kr_enter(), and so use the blocking
1300 rc
= kern_channel_tx_refill_canblock(ring
, UINT32_MAX
, UINT32_MAX
, true, &more
);
1301 if (rc
!= 0 && rc
!= EAGAIN
&& rc
!= EBUSY
) {
1302 os_log_error(OS_LOG_DEFAULT
, "%s: %s ring %s tx refill failed %d\n", __func__
,
1303 pcb
->ipsec_if_xname
, ring
->ckr_name
, rc
);
1306 (void) kr_enter(ring
, TRUE
);
1307 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
1308 if (ring
!= pcb
->ipsec_netif_txring
[ring_idx
]) {
1309 // ring no longer valid
1310 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1312 os_log_error(OS_LOG_DEFAULT
, "%s: %s ring %s index %d bad netif_txring 3\n", __func__
,
1313 pcb
->ipsec_if_xname
, ring
->ckr_name
, ring_idx
);
1317 if (pcb
->ipsec_kpipe_count
) {
1318 uint32_t tx_available
= kern_channel_available_slot_count(ring
);
1319 if (pcb
->ipsec_netif_txring_size
> 0 &&
1320 tx_available
>= pcb
->ipsec_netif_txring_size
- 1) {
1321 // No room left in tx ring, disable output for now
1322 errno_t error
= ifnet_disable_output(pcb
->ipsec_ifp
);
1324 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error
);
1329 if (pcb
->ipsec_kpipe_count
) {
1330 kern_channel_ring_t rx_ring
= pcb
->ipsec_kpipe_rxring
[ring_idx
];
1332 // Unlock while calling notify
1333 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1334 // Signal the kernel pipe ring to read
1335 if (rx_ring
!= NULL
) {
1336 kern_channel_notify(rx_ring
, 0);
1339 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1348 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
1349 kern_channel_ring_t ring
, __unused
uint32_t flags
)
1352 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
1354 if (!ipsec_data_move_begin(pcb
)) {
1355 os_log_error(OS_LOG_DEFAULT
, "%s: data path stopped for %s\n", __func__
, if_name(pcb
->ipsec_ifp
));
1359 if (ipsec_in_wmm_mode(pcb
)) {
1360 for (uint8_t i
= 0; i
< IPSEC_IF_WMM_RING_COUNT
; i
++) {
1361 kern_channel_ring_t nring
= pcb
->ipsec_netif_txring
[i
];
1362 ret
= ipsec_netif_tx_doorbell_one(nxprov
, nexus
, nring
, flags
, i
);
1368 ret
= ipsec_netif_tx_doorbell_one(nxprov
, nexus
, ring
, flags
, 0);
1371 ipsec_data_move_end(pcb
);
1376 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
1377 kern_channel_ring_t rx_ring
, uint32_t flags
)
1379 #pragma unused(nxprov)
1380 #pragma unused(flags)
1381 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
1382 struct kern_channel_ring_stat_increment rx_ring_stats
;
1384 struct netif_stats
*nifs
= &NX_NETIF_PRIVATE(nexus
)->nif_stats
;
1386 if (!ipsec_data_move_begin(pcb
)) {
1387 os_log_error(OS_LOG_DEFAULT
, "%s: data path stopped for %s\n", __func__
, if_name(pcb
->ipsec_ifp
));
1391 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
1393 // Reclaim user-released slots
1394 (void) kern_channel_reclaim(rx_ring
);
1396 STATS_INC(nifs
, NETIF_STATS_RX_SYNC
);
1398 uint32_t avail
= kern_channel_available_slot_count(rx_ring
);
1400 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1401 ipsec_data_move_end(pcb
);
1405 struct kern_pbufpool
*rx_pp
= rx_ring
->ckr_pp
;
1406 VERIFY(rx_pp
!= NULL
);
1407 bzero(&rx_ring_stats
, sizeof(rx_ring_stats
));
1408 kern_channel_slot_t rx_pslot
= NULL
;
1409 kern_channel_slot_t rx_slot
= kern_channel_get_next_slot(rx_ring
, NULL
, NULL
);
1411 while (rx_slot
!= NULL
) {
1412 // Check for a waiting packet
1413 lck_mtx_lock(&pcb
->ipsec_input_chain_lock
);
1414 mbuf_t data
= pcb
->ipsec_input_chain
;
1416 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
1420 // Allocate rx packet
1421 kern_packet_t rx_ph
= 0;
1422 errno_t error
= kern_pbufpool_alloc_nosleep(rx_pp
, 1, &rx_ph
);
1423 if (__improbable(error
!= 0)) {
1424 STATS_INC(nifs
, NETIF_STATS_DROP_NOMEM_PKT
);
1425 STATS_INC(nifs
, NETIF_STATS_DROP
);
1426 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
1430 // Advance waiting packets
1431 if (pcb
->ipsec_input_chain_count
> 0) {
1432 pcb
->ipsec_input_chain_count
--;
1434 pcb
->ipsec_input_chain
= data
->m_nextpkt
;
1435 data
->m_nextpkt
= NULL
;
1436 if (pcb
->ipsec_input_chain
== NULL
) {
1437 pcb
->ipsec_input_chain_last
= NULL
;
1439 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
1441 size_t length
= mbuf_pkthdr_len(data
);
1443 if (length
< sizeof(struct ip
)) {
1446 kern_pbufpool_free(rx_pp
, rx_ph
);
1447 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1448 STATS_INC(nifs
, NETIF_STATS_DROP
);
1449 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1450 pcb
->ipsec_ifp
->if_xname
, length
, sizeof(struct ip
));
1455 struct ip
*ip
= mtod(data
, struct ip
*);
1456 u_int ip_version
= ip
->ip_v
;
1457 switch (ip_version
) {
1467 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1468 pcb
->ipsec_ifp
->if_xname
, ip_version
);
1473 if (length
> rx_pp
->pp_buflet_size
||
1474 (pcb
->ipsec_frag_size_set
&& length
> pcb
->ipsec_input_frag_size
)) {
1475 // We need to fragment to send up into the netif
1477 u_int32_t fragment_mtu
= rx_pp
->pp_buflet_size
;
1478 if (pcb
->ipsec_frag_size_set
&&
1479 pcb
->ipsec_input_frag_size
< rx_pp
->pp_buflet_size
) {
1480 fragment_mtu
= pcb
->ipsec_input_frag_size
;
1483 mbuf_t fragment_chain
= NULL
;
1486 // ip_fragment expects the length in host order
1487 ip
->ip_len
= ntohs(ip
->ip_len
);
1489 // ip_fragment will modify the original data, don't free
1490 int fragment_error
= ip_fragment(data
, pcb
->ipsec_ifp
, fragment_mtu
, TRUE
);
1491 if (fragment_error
== 0 && data
!= NULL
) {
1492 fragment_chain
= data
;
1494 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1495 STATS_INC(nifs
, NETIF_STATS_DROP
);
1496 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1497 pcb
->ipsec_ifp
->if_xname
, length
, fragment_error
);
1502 if (length
< sizeof(struct ip6_hdr
)) {
1504 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1505 STATS_INC(nifs
, NETIF_STATS_DROP
);
1506 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1507 pcb
->ipsec_ifp
->if_xname
, length
, sizeof(struct ip6_hdr
));
1509 // ip6_do_fragmentation will free the original data on success only
1510 struct ip6_hdr
*ip6
= mtod(data
, struct ip6_hdr
*);
1512 int fragment_error
= ip6_do_fragmentation(&data
, 0, pcb
->ipsec_ifp
, sizeof(struct ip6_hdr
),
1513 ip6
, NULL
, fragment_mtu
, ip6
->ip6_nxt
, htonl(ip6_randomid()));
1514 if (fragment_error
== 0 && data
!= NULL
) {
1515 fragment_chain
= data
;
1518 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1519 STATS_INC(nifs
, NETIF_STATS_DROP
);
1520 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1521 pcb
->ipsec_ifp
->if_xname
, length
, fragment_error
);
1527 // Cannot fragment unknown families
1529 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1530 STATS_INC(nifs
, NETIF_STATS_DROP
);
1531 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1532 pcb
->ipsec_ifp
->if_xname
, length
, rx_pp
->pp_buflet_size
);
1537 if (fragment_chain
!= NULL
) {
1538 // Add fragments to chain before continuing
1539 lck_mtx_lock(&pcb
->ipsec_input_chain_lock
);
1540 if (pcb
->ipsec_input_chain
!= NULL
) {
1541 pcb
->ipsec_input_chain_last
->m_nextpkt
= fragment_chain
;
1543 pcb
->ipsec_input_chain
= fragment_chain
;
1545 pcb
->ipsec_input_chain_count
++;
1546 while (fragment_chain
->m_nextpkt
) {
1547 VERIFY(fragment_chain
!= fragment_chain
->m_nextpkt
);
1548 fragment_chain
= fragment_chain
->m_nextpkt
;
1549 pcb
->ipsec_input_chain_count
++;
1551 pcb
->ipsec_input_chain_last
= fragment_chain
;
1552 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
1555 // Make sure to free unused rx packet
1556 kern_pbufpool_free(rx_pp
, rx_ph
);
1561 mbuf_pkthdr_setrcvif(data
, pcb
->ipsec_ifp
);
1563 // Fillout rx packet
1564 kern_buflet_t rx_buf
= kern_packet_get_next_buflet(rx_ph
, NULL
);
1565 VERIFY(rx_buf
!= NULL
);
1566 void *rx_baddr
= kern_buflet_get_data_address(rx_buf
);
1567 VERIFY(rx_baddr
!= NULL
);
1569 // Copy-in data from mbuf to buflet
1570 mbuf_copydata(data
, 0, length
, (void *)rx_baddr
);
1571 kern_packet_clear_flow_uuid(rx_ph
); // Zero flow id
1573 // Finalize and attach the packet
1574 error
= kern_buflet_set_data_offset(rx_buf
, 0);
1576 error
= kern_buflet_set_data_length(rx_buf
, length
);
1578 error
= kern_packet_set_headroom(rx_ph
, 0);
1580 error
= kern_packet_finalize(rx_ph
);
1582 error
= kern_channel_slot_attach_packet(rx_ring
, rx_slot
, rx_ph
);
1585 STATS_INC(nifs
, NETIF_STATS_RX_PACKETS
);
1586 STATS_INC(nifs
, NETIF_STATS_RX_COPY_MBUF
);
1587 bpf_tap_packet_in(pcb
->ipsec_ifp
, DLT_RAW
, rx_ph
, NULL
, 0);
1589 rx_ring_stats
.kcrsi_slots_transferred
++;
1590 rx_ring_stats
.kcrsi_bytes_transferred
+= length
;
1592 if (!pcb
->ipsec_ext_ifdata_stats
) {
1593 ifnet_stat_increment_in(pcb
->ipsec_ifp
, 1, length
, 0);
1600 rx_slot
= kern_channel_get_next_slot(rx_ring
, rx_slot
, NULL
);
1603 for (uint8_t ring_idx
= 0; ring_idx
< pcb
->ipsec_kpipe_count
; ring_idx
++) {
1604 struct kern_channel_ring_stat_increment tx_ring_stats
;
1605 bzero(&tx_ring_stats
, sizeof(tx_ring_stats
));
1606 kern_channel_ring_t tx_ring
= pcb
->ipsec_kpipe_txring
[ring_idx
];
1607 kern_channel_slot_t tx_pslot
= NULL
;
1608 kern_channel_slot_t tx_slot
= NULL
;
1609 if (tx_ring
== NULL
) {
1610 // Net-If TX ring not set up yet, nothing to read
1615 // Unlock ipsec before entering ring
1616 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1618 (void)kr_enter(tx_ring
, TRUE
);
1620 // Lock again after entering and validate
1621 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
1623 if (tx_ring
!= pcb
->ipsec_kpipe_txring
[ring_idx
]) {
1627 tx_slot
= kern_channel_get_next_slot(tx_ring
, NULL
, NULL
);
1628 if (tx_slot
== NULL
) {
1629 // Nothing to read, don't bother signalling
1633 while (rx_slot
!= NULL
&& tx_slot
!= NULL
) {
1639 // Allocate rx packet
1640 kern_packet_t rx_ph
= 0;
1641 error
= kern_pbufpool_alloc_nosleep(rx_pp
, 1, &rx_ph
);
1642 if (__improbable(error
!= 0)) {
1643 STATS_INC(nifs
, NETIF_STATS_DROP_NOMEM_PKT
);
1644 STATS_INC(nifs
, NETIF_STATS_DROP
);
1648 kern_packet_t tx_ph
= kern_channel_slot_get_packet(tx_ring
, tx_slot
);
1652 tx_slot
= kern_channel_get_next_slot(tx_ring
, tx_slot
, NULL
);
1655 kern_pbufpool_free(rx_pp
, rx_ph
);
1659 kern_buflet_t tx_buf
= kern_packet_get_next_buflet(tx_ph
, NULL
);
1660 VERIFY(tx_buf
!= NULL
);
1661 uint8_t *tx_baddr
= kern_buflet_get_data_address(tx_buf
);
1662 VERIFY(tx_baddr
!= 0);
1663 tx_baddr
+= kern_buflet_get_data_offset(tx_buf
);
1665 length
= MIN(kern_packet_get_data_length(tx_ph
),
1666 pcb
->ipsec_slot_size
);
1668 // Increment TX stats
1669 tx_ring_stats
.kcrsi_slots_transferred
++;
1670 tx_ring_stats
.kcrsi_bytes_transferred
+= length
;
1672 if (length
>= sizeof(struct ip
)) {
1673 error
= mbuf_gethdr(MBUF_DONTWAIT
, MBUF_TYPE_HEADER
, &data
);
1675 error
= mbuf_copyback(data
, 0, length
, tx_baddr
, MBUF_DONTWAIT
);
1677 lck_mtx_lock(&pcb
->ipsec_kpipe_decrypt_lock
);
1678 struct ip
*ip
= mtod(data
, struct ip
*);
1679 u_int ip_version
= ip
->ip_v
;
1680 switch (ip_version
) {
1683 ip
->ip_len
= ntohs(ip
->ip_len
) - sizeof(struct ip
);
1684 ip
->ip_off
= ntohs(ip
->ip_off
);
1686 if (length
< ip
->ip_len
) {
1687 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1688 pcb
->ipsec_ifp
->if_xname
, length
, ip
->ip_len
);
1689 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1690 STATS_INC(nifs
, NETIF_STATS_DROP
);
1694 data
= esp4_input_extended(data
, sizeof(struct ip
), pcb
->ipsec_ifp
);
1699 if (length
< sizeof(struct ip6_hdr
)) {
1700 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1701 pcb
->ipsec_ifp
->if_xname
, length
);
1702 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1703 STATS_INC(nifs
, NETIF_STATS_DROP
);
1708 struct ip6_hdr
*ip6
= mtod(data
, struct ip6_hdr
*);
1709 const size_t ip6_len
= sizeof(*ip6
) + ntohs(ip6
->ip6_plen
);
1710 if (length
< ip6_len
) {
1711 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1712 pcb
->ipsec_ifp
->if_xname
, length
, ip6_len
);
1713 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1714 STATS_INC(nifs
, NETIF_STATS_DROP
);
1718 int offset
= sizeof(struct ip6_hdr
);
1719 esp6_input_extended(&data
, &offset
, ip6
->ip6_nxt
, pcb
->ipsec_ifp
);
1725 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
1726 pcb
->ipsec_ifp
->if_xname
, ip_version
);
1727 STATS_INC(nifs
, NETIF_STATS_DROP
);
1733 lck_mtx_unlock(&pcb
->ipsec_kpipe_decrypt_lock
);
1735 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb
->ipsec_ifp
->if_xname
, length
, error
);
1736 STATS_INC(nifs
, NETIF_STATS_DROP_NOMEM_MBUF
);
1737 STATS_INC(nifs
, NETIF_STATS_DROP
);
1742 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb
->ipsec_ifp
->if_xname
, error
);
1743 STATS_INC(nifs
, NETIF_STATS_DROP_NOMEM_MBUF
);
1744 STATS_INC(nifs
, NETIF_STATS_DROP
);
1747 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb
->ipsec_ifp
->if_xname
, length
);
1748 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1749 STATS_INC(nifs
, NETIF_STATS_DROP
);
1753 // Failed to get decrypted data data
1754 kern_pbufpool_free(rx_pp
, rx_ph
);
1758 length
= mbuf_pkthdr_len(data
);
1759 if (length
> rx_pp
->pp_buflet_size
) {
1762 kern_pbufpool_free(rx_pp
, rx_ph
);
1763 STATS_INC(nifs
, NETIF_STATS_DROP_BADLEN
);
1764 STATS_INC(nifs
, NETIF_STATS_DROP
);
1765 os_log_error(OS_LOG_DEFAULT
, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1766 pcb
->ipsec_ifp
->if_xname
, length
, rx_pp
->pp_buflet_size
);
1770 mbuf_pkthdr_setrcvif(data
, pcb
->ipsec_ifp
);
1772 // Fillout rx packet
1773 kern_buflet_t rx_buf
= kern_packet_get_next_buflet(rx_ph
, NULL
);
1774 VERIFY(rx_buf
!= NULL
);
1775 void *rx_baddr
= kern_buflet_get_data_address(rx_buf
);
1776 VERIFY(rx_baddr
!= NULL
);
1778 // Copy-in data from mbuf to buflet
1779 mbuf_copydata(data
, 0, length
, (void *)rx_baddr
);
1780 kern_packet_clear_flow_uuid(rx_ph
); // Zero flow id
1782 // Finalize and attach the packet
1783 error
= kern_buflet_set_data_offset(rx_buf
, 0);
1785 error
= kern_buflet_set_data_length(rx_buf
, length
);
1787 error
= kern_packet_set_link_header_offset(rx_ph
, 0);
1789 error
= kern_packet_set_network_header_offset(rx_ph
, 0);
1791 error
= kern_packet_finalize(rx_ph
);
1793 error
= kern_channel_slot_attach_packet(rx_ring
, rx_slot
, rx_ph
);
1796 STATS_INC(nifs
, NETIF_STATS_RX_PACKETS
);
1797 STATS_INC(nifs
, NETIF_STATS_RX_COPY_DIRECT
);
1798 bpf_tap_packet_in(pcb
->ipsec_ifp
, DLT_RAW
, rx_ph
, NULL
, 0);
1800 rx_ring_stats
.kcrsi_slots_transferred
++;
1801 rx_ring_stats
.kcrsi_bytes_transferred
+= length
;
1803 if (!pcb
->ipsec_ext_ifdata_stats
) {
1804 ifnet_stat_increment_in(pcb
->ipsec_ifp
, 1, length
, 0);
1810 rx_slot
= kern_channel_get_next_slot(rx_ring
, rx_slot
, NULL
);
1815 kern_channel_advance_slot(tx_ring
, tx_pslot
);
1816 kern_channel_increment_ring_net_stats(tx_ring
, pcb
->ipsec_ifp
, &tx_ring_stats
);
1817 (void)kern_channel_reclaim(tx_ring
);
1820 // Unlock first, then exit ring
1821 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1822 if (tx_ring
!= NULL
) {
1823 if (tx_pslot
!= NULL
) {
1824 kern_channel_notify(tx_ring
, 0);
1829 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
1833 kern_channel_advance_slot(rx_ring
, rx_pslot
);
1834 kern_channel_increment_ring_net_stats(rx_ring
, pcb
->ipsec_ifp
, &rx_ring_stats
);
1838 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1840 ipsec_data_move_end(pcb
);
1845 ipsec_nexus_ifattach(struct ipsec_pcb
*pcb
,
1846 struct ifnet_init_eparams
*init_params
,
1850 nexus_controller_t controller
= kern_nexus_shared_controller();
1851 struct kern_nexus_net_init net_init
;
1852 struct kern_pbufpool_init pp_init
;
1854 nexus_name_t provider_name
;
1855 snprintf((char *)provider_name
, sizeof(provider_name
),
1856 "com.apple.netif.%s", pcb
->ipsec_if_xname
);
1858 struct kern_nexus_provider_init prov_init
= {
1859 .nxpi_version
= KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION
,
1860 .nxpi_flags
= NXPIF_VIRTUAL_DEVICE
,
1861 .nxpi_pre_connect
= ipsec_nexus_pre_connect
,
1862 .nxpi_connected
= ipsec_nexus_connected
,
1863 .nxpi_pre_disconnect
= ipsec_netif_pre_disconnect
,
1864 .nxpi_disconnected
= ipsec_nexus_disconnected
,
1865 .nxpi_ring_init
= ipsec_netif_ring_init
,
1866 .nxpi_ring_fini
= ipsec_netif_ring_fini
,
1867 .nxpi_slot_init
= NULL
,
1868 .nxpi_slot_fini
= NULL
,
1869 .nxpi_sync_tx
= ipsec_netif_sync_tx
,
1870 .nxpi_sync_rx
= ipsec_netif_sync_rx
,
1871 .nxpi_tx_doorbell
= ipsec_netif_tx_doorbell
,
1874 nexus_attr_t nxa
= NULL
;
1875 err
= kern_nexus_attr_create(&nxa
);
1876 IPSEC_IF_VERIFY(err
== 0);
1878 os_log_error(OS_LOG_DEFAULT
, "%s: kern_nexus_attr_create failed: %d\n",
1883 uint64_t slot_buffer_size
= pcb
->ipsec_slot_size
;
1884 err
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_SLOT_BUF_SIZE
, slot_buffer_size
);
1887 // Reset ring size for netif nexus to limit memory usage
1888 uint64_t ring_size
= pcb
->ipsec_netif_ring_size
;
1889 err
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_TX_SLOTS
, ring_size
);
1891 err
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_RX_SLOTS
, ring_size
);
1896 if (ipsec_in_wmm_mode(pcb
)) {
1897 os_log(OS_LOG_DEFAULT
, "%s: %s enabling wmm mode\n",
1898 __func__
, pcb
->ipsec_if_xname
);
1900 init_params
->output_sched_model
= IFNET_SCHED_MODEL_DRIVER_MANAGED
;
1902 err
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_TX_RINGS
,
1903 IPSEC_NETIF_WMM_TX_RING_COUNT
);
1905 err
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_RX_RINGS
,
1906 IPSEC_NETIF_WMM_RX_RING_COUNT
);
1909 err
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_QMAP
, NEXUS_QMAP_TYPE_WMM
);
1913 pcb
->ipsec_netif_txring_size
= ring_size
;
1915 bzero(&pp_init
, sizeof(pp_init
));
1916 pp_init
.kbi_version
= KERN_PBUFPOOL_CURRENT_VERSION
;
1917 pp_init
.kbi_flags
|= KBIF_VIRTUAL_DEVICE
;
1918 // Note: we need more packets than can be held in the tx and rx rings because
1919 // packets can also be in the AQM queue(s)
1920 pp_init
.kbi_packets
= pcb
->ipsec_netif_ring_size
* (2 * pcb
->ipsec_kpipe_count
+ 1);
1921 pp_init
.kbi_bufsize
= pcb
->ipsec_slot_size
;
1922 pp_init
.kbi_buf_seg_size
= IPSEC_IF_DEFAULT_BUF_SEG_SIZE
;
1923 pp_init
.kbi_max_frags
= 1;
1924 (void) snprintf((char *)pp_init
.kbi_name
, sizeof(pp_init
.kbi_name
),
1925 "%s", provider_name
);
1926 pp_init
.kbi_ctx
= NULL
;
1927 pp_init
.kbi_ctx_retain
= NULL
;
1928 pp_init
.kbi_ctx_release
= NULL
;
1930 err
= kern_pbufpool_create(&pp_init
, &pcb
->ipsec_netif_pp
, NULL
);
1932 os_log_error(OS_LOG_DEFAULT
, "%s pbufbool create failed, error %d\n", __func__
, err
);
1936 err
= kern_nexus_controller_register_provider(controller
,
1942 &pcb
->ipsec_nx
.if_provider
);
1943 IPSEC_IF_VERIFY(err
== 0);
1945 os_log_error(OS_LOG_DEFAULT
, "%s register provider failed, error %d\n",
1950 bzero(&net_init
, sizeof(net_init
));
1951 net_init
.nxneti_version
= KERN_NEXUS_NET_CURRENT_VERSION
;
1952 net_init
.nxneti_flags
= 0;
1953 net_init
.nxneti_eparams
= init_params
;
1954 net_init
.nxneti_lladdr
= NULL
;
1955 net_init
.nxneti_prepare
= ipsec_netif_prepare
;
1956 net_init
.nxneti_rx_pbufpool
= pcb
->ipsec_netif_pp
;
1957 net_init
.nxneti_tx_pbufpool
= pcb
->ipsec_netif_pp
;
1958 err
= kern_nexus_controller_alloc_net_provider_instance(controller
,
1959 pcb
->ipsec_nx
.if_provider
,
1961 &pcb
->ipsec_nx
.if_instance
,
1964 IPSEC_IF_VERIFY(err
== 0);
1966 os_log_error(OS_LOG_DEFAULT
, "%s alloc_net_provider_instance failed, %d\n",
1968 kern_nexus_controller_deregister_provider(controller
,
1969 pcb
->ipsec_nx
.if_provider
);
1970 uuid_clear(pcb
->ipsec_nx
.if_provider
);
1976 kern_nexus_attr_destroy(nxa
);
1978 if (err
&& pcb
->ipsec_netif_pp
!= NULL
) {
1979 kern_pbufpool_destroy(pcb
->ipsec_netif_pp
);
1980 pcb
->ipsec_netif_pp
= NULL
;
1986 ipsec_detach_provider_and_instance(uuid_t provider
, uuid_t instance
)
1988 nexus_controller_t controller
= kern_nexus_shared_controller();
1991 if (!uuid_is_null(instance
)) {
1992 err
= kern_nexus_controller_free_provider_instance(controller
,
1995 os_log_error(OS_LOG_DEFAULT
, "%s free_provider_instance failed %d\n",
1998 uuid_clear(instance
);
2000 if (!uuid_is_null(provider
)) {
2001 err
= kern_nexus_controller_deregister_provider(controller
,
2004 os_log_error(OS_LOG_DEFAULT
, "%s deregister_provider %d\n", __func__
, err
);
2006 uuid_clear(provider
);
2012 ipsec_nexus_detach(struct ipsec_pcb
*pcb
)
2014 ipsec_nx_t nx
= &pcb
->ipsec_nx
;
2015 nexus_controller_t controller
= kern_nexus_shared_controller();
2018 if (!uuid_is_null(nx
->fsw_host
)) {
2019 err
= kern_nexus_ifdetach(controller
,
2023 os_log_error(OS_LOG_DEFAULT
, "%s: kern_nexus_ifdetach ms host failed %d\n",
2028 if (!uuid_is_null(nx
->fsw_device
)) {
2029 err
= kern_nexus_ifdetach(controller
,
2033 os_log_error(OS_LOG_DEFAULT
, "%s: kern_nexus_ifdetach ms device failed %d\n",
2038 ipsec_detach_provider_and_instance(nx
->if_provider
,
2040 ipsec_detach_provider_and_instance(nx
->fsw_provider
,
2043 if (pcb
->ipsec_netif_pp
!= NULL
) {
2044 kern_pbufpool_destroy(pcb
->ipsec_netif_pp
);
2045 pcb
->ipsec_netif_pp
= NULL
;
2047 memset(nx
, 0, sizeof(*nx
));
2051 ipsec_create_fs_provider_and_instance(struct ipsec_pcb
*pcb
,
2052 const char *type_name
,
2054 uuid_t
*provider
, uuid_t
*instance
)
2056 nexus_attr_t attr
= NULL
;
2057 nexus_controller_t controller
= kern_nexus_shared_controller();
2060 struct kern_nexus_init init
;
2061 nexus_name_t provider_name
;
2063 err
= kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH
,
2065 IPSEC_IF_VERIFY(err
== 0);
2067 os_log_error(OS_LOG_DEFAULT
, "%s can't get %s provider, error %d\n",
2068 __func__
, type_name
, err
);
2072 err
= kern_nexus_attr_create(&attr
);
2073 IPSEC_IF_VERIFY(err
== 0);
2075 os_log_error(OS_LOG_DEFAULT
, "%s: kern_nexus_attr_create failed: %d\n",
2080 uint64_t slot_buffer_size
= pcb
->ipsec_slot_size
;
2081 err
= kern_nexus_attr_set(attr
, NEXUS_ATTR_SLOT_BUF_SIZE
, slot_buffer_size
);
2084 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2085 uint64_t tx_ring_size
= pcb
->ipsec_tx_fsw_ring_size
;
2086 err
= kern_nexus_attr_set(attr
, NEXUS_ATTR_TX_SLOTS
, tx_ring_size
);
2088 uint64_t rx_ring_size
= pcb
->ipsec_rx_fsw_ring_size
;
2089 err
= kern_nexus_attr_set(attr
, NEXUS_ATTR_RX_SLOTS
, rx_ring_size
);
2092 * Configure flowswitch to use super-packet (multi-buflet).
2093 * This allows flowswitch to perform intra-stack packet aggregation.
2095 err
= kern_nexus_attr_set(attr
, NEXUS_ATTR_MAX_FRAGS
,
2096 sk_fsw_rx_agg_tcp
? NX_PBUF_FRAGS_MAX
: 1);
2099 snprintf((char *)provider_name
, sizeof(provider_name
),
2100 "com.apple.%s.%s", type_name
, ifname
);
2101 err
= kern_nexus_controller_register_provider(controller
,
2108 kern_nexus_attr_destroy(attr
);
2110 IPSEC_IF_VERIFY(err
== 0);
2112 os_log_error(OS_LOG_DEFAULT
, "%s register %s provider failed, error %d\n",
2113 __func__
, type_name
, err
);
2116 bzero(&init
, sizeof(init
));
2117 init
.nxi_version
= KERN_NEXUS_CURRENT_VERSION
;
2118 err
= kern_nexus_controller_alloc_provider_instance(controller
,
2122 IPSEC_IF_VERIFY(err
== 0);
2124 os_log_error(OS_LOG_DEFAULT
, "%s alloc_provider_instance %s failed, %d\n",
2125 __func__
, type_name
, err
);
2126 kern_nexus_controller_deregister_provider(controller
,
2128 uuid_clear(*provider
);
2135 ipsec_flowswitch_attach(struct ipsec_pcb
*pcb
)
2137 nexus_controller_t controller
= kern_nexus_shared_controller();
2139 ipsec_nx_t nx
= &pcb
->ipsec_nx
;
2141 // Allocate flowswitch
2142 err
= ipsec_create_fs_provider_and_instance(pcb
,
2144 pcb
->ipsec_ifp
->if_xname
,
2148 os_log_error(OS_LOG_DEFAULT
, "%s: failed to create bridge provider and instance\n",
2153 // Attach flowswitch to device port
2154 err
= kern_nexus_ifattach(controller
, nx
->fsw_instance
,
2155 NULL
, nx
->if_instance
,
2156 FALSE
, &nx
->fsw_device
);
2158 os_log_error(OS_LOG_DEFAULT
, "%s kern_nexus_ifattach ms device %d\n", __func__
, err
);
2162 // Attach flowswitch to host port
2163 err
= kern_nexus_ifattach(controller
, nx
->fsw_instance
,
2164 NULL
, nx
->if_instance
,
2165 TRUE
, &nx
->fsw_host
);
2167 os_log_error(OS_LOG_DEFAULT
, "%s kern_nexus_ifattach ms host %d\n", __func__
, err
);
2171 // Extract the agent UUID and save for later
2172 struct kern_nexus
*flowswitch_nx
= nx_find(nx
->fsw_instance
, false);
2173 if (flowswitch_nx
!= NULL
) {
2174 struct nx_flowswitch
*flowswitch
= NX_FSW_PRIVATE(flowswitch_nx
);
2175 if (flowswitch
!= NULL
) {
2176 FSW_RLOCK(flowswitch
);
2177 uuid_copy(nx
->fsw_agent
, flowswitch
->fsw_agent_uuid
);
2178 FSW_UNLOCK(flowswitch
);
2180 os_log_error(OS_LOG_DEFAULT
, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2182 nx_release(flowswitch_nx
);
2184 os_log_error(OS_LOG_DEFAULT
, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2190 ipsec_nexus_detach(pcb
);
2192 errno_t detach_error
= 0;
2193 if ((detach_error
= ifnet_detach(pcb
->ipsec_ifp
)) != 0) {
2194 panic("ipsec_flowswitch_attach - ifnet_detach failed: %d\n", detach_error
);
2201 #pragma mark Kernel Pipe Nexus
2204 ipsec_register_kernel_pipe_nexus(struct ipsec_pcb
*pcb
)
2206 nexus_attr_t nxa
= NULL
;
2209 lck_mtx_lock(&ipsec_lock
);
2210 if (ipsec_ncd_refcount
++) {
2211 lck_mtx_unlock(&ipsec_lock
);
2215 result
= kern_nexus_controller_create(&ipsec_ncd
);
2217 os_log_error(OS_LOG_DEFAULT
, "%s: kern_nexus_controller_create failed: %d\n",
2218 __FUNCTION__
, result
);
2223 result
= kern_nexus_get_default_domain_provider(
2224 NEXUS_TYPE_KERNEL_PIPE
, &dom_prov
);
2226 os_log_error(OS_LOG_DEFAULT
, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
2227 __FUNCTION__
, result
);
2231 struct kern_nexus_provider_init prov_init
= {
2232 .nxpi_version
= KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION
,
2233 .nxpi_flags
= NXPIF_VIRTUAL_DEVICE
,
2234 .nxpi_pre_connect
= ipsec_nexus_pre_connect
,
2235 .nxpi_connected
= ipsec_nexus_connected
,
2236 .nxpi_pre_disconnect
= ipsec_nexus_pre_disconnect
,
2237 .nxpi_disconnected
= ipsec_nexus_disconnected
,
2238 .nxpi_ring_init
= ipsec_kpipe_ring_init
,
2239 .nxpi_ring_fini
= ipsec_kpipe_ring_fini
,
2240 .nxpi_slot_init
= NULL
,
2241 .nxpi_slot_fini
= NULL
,
2242 .nxpi_sync_tx
= ipsec_kpipe_sync_tx
,
2243 .nxpi_sync_rx
= ipsec_kpipe_sync_rx
,
2244 .nxpi_tx_doorbell
= NULL
,
2247 result
= kern_nexus_attr_create(&nxa
);
2249 os_log_error(OS_LOG_DEFAULT
, "%s: kern_nexus_attr_create failed: %d\n",
2250 __FUNCTION__
, result
);
2254 uint64_t slot_buffer_size
= IPSEC_IF_DEFAULT_SLOT_SIZE
;
2255 result
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_SLOT_BUF_SIZE
, slot_buffer_size
);
2256 VERIFY(result
== 0);
2258 // Reset ring size for kernel pipe nexus to limit memory usage
2259 // Note: It's better to have less on slots on the kpipe TX ring than the netif
2260 // so back pressure is applied at the AQM layer
2261 uint64_t ring_size
=
2262 pcb
->ipsec_kpipe_tx_ring_size
!= 0 ? pcb
->ipsec_kpipe_tx_ring_size
:
2263 pcb
->ipsec_netif_ring_size
!= 0 ? pcb
->ipsec_netif_ring_size
:
2265 result
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_TX_SLOTS
, ring_size
);
2266 VERIFY(result
== 0);
2269 pcb
->ipsec_kpipe_rx_ring_size
!= 0 ? pcb
->ipsec_kpipe_rx_ring_size
:
2270 pcb
->ipsec_netif_ring_size
!= 0 ? pcb
->ipsec_netif_ring_size
:
2272 result
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_RX_SLOTS
, ring_size
);
2273 VERIFY(result
== 0);
2275 result
= kern_nexus_controller_register_provider(ipsec_ncd
,
2277 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
2283 os_log_error(OS_LOG_DEFAULT
, "%s: kern_nexus_controller_register_provider failed: %d\n",
2284 __FUNCTION__
, result
);
2290 kern_nexus_attr_destroy(nxa
);
2295 kern_nexus_controller_destroy(ipsec_ncd
);
2298 ipsec_ncd_refcount
= 0;
2301 lck_mtx_unlock(&ipsec_lock
);
2307 ipsec_unregister_kernel_pipe_nexus(void)
2309 lck_mtx_lock(&ipsec_lock
);
2311 VERIFY(ipsec_ncd_refcount
> 0);
2313 if (--ipsec_ncd_refcount
== 0) {
2314 kern_nexus_controller_destroy(ipsec_ncd
);
2318 lck_mtx_unlock(&ipsec_lock
);
2321 /* This structure only holds onto kpipe channels that need to be
2322 * freed in the future, but are cleared from the pcb under lock
2324 struct ipsec_detached_channels
{
2327 uuid_t uuids
[IPSEC_IF_MAX_RING_COUNT
];
2331 ipsec_detach_channels(struct ipsec_pcb
*pcb
, struct ipsec_detached_channels
*dc
)
2333 LCK_RW_ASSERT(&pcb
->ipsec_pcb_lock
, LCK_RW_TYPE_EXCLUSIVE
);
2335 if (!ipsec_flag_isset(pcb
, IPSEC_FLAGS_KPIPE_ALLOCATED
)) {
2336 for (int i
= 0; i
< IPSEC_IF_MAX_RING_COUNT
; i
++) {
2337 VERIFY(uuid_is_null(pcb
->ipsec_kpipe_uuid
[i
]));
2343 dc
->count
= pcb
->ipsec_kpipe_count
;
2345 VERIFY(dc
->count
>= 0);
2346 VERIFY(dc
->count
<= IPSEC_IF_MAX_RING_COUNT
);
2348 for (int i
= 0; i
< dc
->count
; i
++) {
2349 VERIFY(!uuid_is_null(pcb
->ipsec_kpipe_uuid
[i
]));
2350 uuid_copy(dc
->uuids
[i
], pcb
->ipsec_kpipe_uuid
[i
]);
2351 uuid_clear(pcb
->ipsec_kpipe_uuid
[i
]);
2353 for (int i
= dc
->count
; i
< IPSEC_IF_MAX_RING_COUNT
; i
++) {
2354 VERIFY(uuid_is_null(pcb
->ipsec_kpipe_uuid
[i
]));
2358 VERIFY(pcb
->ipsec_kpipe_pp
);
2360 VERIFY(!pcb
->ipsec_kpipe_pp
);
2363 dc
->pp
= pcb
->ipsec_kpipe_pp
;
2365 pcb
->ipsec_kpipe_pp
= NULL
;
2367 ipsec_flag_clr(pcb
, IPSEC_FLAGS_KPIPE_ALLOCATED
);
2371 ipsec_free_channels(struct ipsec_detached_channels
*dc
)
2377 for (int i
= 0; i
< dc
->count
; i
++) {
2379 result
= kern_nexus_controller_free_provider_instance(ipsec_ncd
, dc
->uuids
[i
]);
2384 kern_pbufpool_destroy(dc
->pp
);
2386 ipsec_unregister_kernel_pipe_nexus();
2388 memset(dc
, 0, sizeof(*dc
));
2392 ipsec_enable_channel(struct ipsec_pcb
*pcb
, struct proc
*proc
)
2394 struct kern_nexus_init init
;
2395 struct kern_pbufpool_init pp_init
;
2398 kauth_cred_t cred
= kauth_cred_get();
2399 result
= priv_check_cred(cred
, PRIV_SKYWALK_REGISTER_KERNEL_PIPE
, 0);
2404 VERIFY(pcb
->ipsec_kpipe_count
);
2405 VERIFY(!ipsec_flag_isset(pcb
, IPSEC_FLAGS_KPIPE_ALLOCATED
));
2407 result
= ipsec_register_kernel_pipe_nexus(pcb
);
2409 lck_rw_lock_exclusive(&pcb
->ipsec_pcb_lock
);
2412 os_log_error(OS_LOG_DEFAULT
, "%s: %s failed to register kernel pipe nexus\n",
2413 __func__
, pcb
->ipsec_if_xname
);
2419 bzero(&pp_init
, sizeof(pp_init
));
2420 pp_init
.kbi_version
= KERN_PBUFPOOL_CURRENT_VERSION
;
2421 pp_init
.kbi_flags
|= KBIF_VIRTUAL_DEVICE
;
2422 // Note: We only needs are many packets as can be held in the tx and rx rings
2423 pp_init
.kbi_packets
= pcb
->ipsec_netif_ring_size
* 2 * pcb
->ipsec_kpipe_count
;
2424 pp_init
.kbi_bufsize
= pcb
->ipsec_slot_size
;
2425 pp_init
.kbi_buf_seg_size
= IPSEC_IF_DEFAULT_BUF_SEG_SIZE
;
2426 pp_init
.kbi_max_frags
= 1;
2427 pp_init
.kbi_flags
|= KBIF_QUANTUM
;
2428 (void) snprintf((char *)pp_init
.kbi_name
, sizeof(pp_init
.kbi_name
),
2429 "com.apple.kpipe.%s", pcb
->ipsec_if_xname
);
2430 pp_init
.kbi_ctx
= NULL
;
2431 pp_init
.kbi_ctx_retain
= NULL
;
2432 pp_init
.kbi_ctx_release
= NULL
;
2434 result
= kern_pbufpool_create(&pp_init
, &pcb
->ipsec_kpipe_pp
,
2437 os_log_error(OS_LOG_DEFAULT
, "%s: %s pbufbool create failed, error %d\n",
2438 __func__
, pcb
->ipsec_if_xname
, result
);
2442 bzero(&init
, sizeof(init
));
2443 init
.nxi_version
= KERN_NEXUS_CURRENT_VERSION
;
2444 init
.nxi_tx_pbufpool
= pcb
->ipsec_kpipe_pp
;
2446 for (unsigned int i
= 0; i
< pcb
->ipsec_kpipe_count
; i
++) {
2447 VERIFY(uuid_is_null(pcb
->ipsec_kpipe_uuid
[i
]));
2448 result
= kern_nexus_controller_alloc_provider_instance(ipsec_ncd
,
2449 ipsec_kpipe_uuid
, pcb
, &pcb
->ipsec_kpipe_uuid
[i
], &init
);
2452 nexus_port_t port
= NEXUS_PORT_KERNEL_PIPE_CLIENT
;
2453 pid_t pid
= pcb
->ipsec_kpipe_pid
;
2455 pid
= proc_pid(proc
);
2457 result
= kern_nexus_controller_bind_provider_instance(ipsec_ncd
,
2458 pcb
->ipsec_kpipe_uuid
[i
], &port
,
2459 pid
, NULL
, NULL
, 0, NEXUS_BIND_PID
);
2463 /* Unwind all of them on error */
2464 for (int j
= 0; j
< IPSEC_IF_MAX_RING_COUNT
; j
++) {
2465 if (!uuid_is_null(pcb
->ipsec_kpipe_uuid
[j
])) {
2466 kern_nexus_controller_free_provider_instance(ipsec_ncd
,
2467 pcb
->ipsec_kpipe_uuid
[j
]);
2468 uuid_clear(pcb
->ipsec_kpipe_uuid
[j
]);
2476 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
2479 if (pcb
->ipsec_kpipe_pp
!= NULL
) {
2480 kern_pbufpool_destroy(pcb
->ipsec_kpipe_pp
);
2481 pcb
->ipsec_kpipe_pp
= NULL
;
2483 ipsec_unregister_kernel_pipe_nexus();
2485 ipsec_flag_set(pcb
, IPSEC_FLAGS_KPIPE_ALLOCATED
);
2491 #endif // IPSEC_NEXUS
2494 /* Kernel control functions */
2497 ipsec_find_by_unit(u_int32_t unit
)
2499 struct ipsec_pcb
*next_pcb
= NULL
;
2502 TAILQ_FOREACH(next_pcb
, &ipsec_head
, ipsec_chain
) {
2503 if (next_pcb
->ipsec_unit
== unit
) {
2513 ipsec_free_pcb(struct ipsec_pcb
*pcb
, bool locked
)
2516 mbuf_freem_list(pcb
->ipsec_input_chain
);
2517 pcb
->ipsec_input_chain_count
= 0;
2518 lck_mtx_destroy(&pcb
->ipsec_input_chain_lock
, ipsec_lck_grp
);
2519 lck_mtx_destroy(&pcb
->ipsec_kpipe_encrypt_lock
, ipsec_lck_grp
);
2520 lck_mtx_destroy(&pcb
->ipsec_kpipe_decrypt_lock
, ipsec_lck_grp
);
2521 #endif // IPSEC_NEXUS
2522 lck_mtx_destroy(&pcb
->ipsec_pcb_data_move_lock
, ipsec_lck_grp
);
2523 lck_rw_destroy(&pcb
->ipsec_pcb_lock
, ipsec_lck_grp
);
2525 lck_mtx_lock(&ipsec_lock
);
2527 TAILQ_REMOVE(&ipsec_head
, pcb
, ipsec_chain
);
2529 lck_mtx_unlock(&ipsec_lock
);
2531 zfree(ipsec_pcb_zone
, pcb
);
2535 ipsec_ctl_setup(u_int32_t
*unit
, void **unitinfo
)
2537 if (unit
== NULL
|| unitinfo
== NULL
) {
2541 lck_mtx_lock(&ipsec_lock
);
2543 /* Find next available unit */
2546 while (*unit
!= ctl_maxunit
) {
2547 if (ipsec_find_by_unit(*unit
)) {
2553 if (*unit
== ctl_maxunit
) {
2554 lck_mtx_unlock(&ipsec_lock
);
2557 } else if (ipsec_find_by_unit(*unit
)) {
2558 lck_mtx_unlock(&ipsec_lock
);
2562 /* Find some open interface id */
2563 u_int32_t chosen_unique_id
= 1;
2564 struct ipsec_pcb
*next_pcb
= TAILQ_LAST(&ipsec_head
, ipsec_list
);
2565 if (next_pcb
!= NULL
) {
2566 /* List was not empty, add one to the last item */
2567 chosen_unique_id
= next_pcb
->ipsec_unique_id
+ 1;
2571 * If this wrapped the id number, start looking at
2572 * the front of the list for an unused id.
2574 if (chosen_unique_id
== 0) {
2575 /* Find the next unused ID */
2576 chosen_unique_id
= 1;
2577 TAILQ_FOREACH(next_pcb
, &ipsec_head
, ipsec_chain
) {
2578 if (next_pcb
->ipsec_unique_id
> chosen_unique_id
) {
2579 /* We found a gap */
2583 chosen_unique_id
= next_pcb
->ipsec_unique_id
+ 1;
2588 struct ipsec_pcb
*pcb
= zalloc_flags(ipsec_pcb_zone
, Z_WAITOK
| Z_ZERO
);
2591 pcb
->ipsec_unit
= *unit
;
2592 pcb
->ipsec_unique_id
= chosen_unique_id
;
2594 if (next_pcb
!= NULL
) {
2595 TAILQ_INSERT_BEFORE(next_pcb
, pcb
, ipsec_chain
);
2597 TAILQ_INSERT_TAIL(&ipsec_head
, pcb
, ipsec_chain
);
2600 lck_mtx_unlock(&ipsec_lock
);
2606 ipsec_ctl_bind(kern_ctl_ref kctlref
,
2607 struct sockaddr_ctl
*sac
,
2610 if (*unitinfo
== NULL
) {
2612 (void)ipsec_ctl_setup(&unit
, unitinfo
);
2615 struct ipsec_pcb
*pcb
= (struct ipsec_pcb
*)*unitinfo
;
2620 /* Setup the protocol control block */
2621 pcb
->ipsec_ctlref
= kctlref
;
2622 pcb
->ipsec_unit
= sac
->sc_unit
;
2623 pcb
->ipsec_output_service_class
= MBUF_SC_OAM
;
2626 pcb
->ipsec_use_netif
= false;
2627 pcb
->ipsec_slot_size
= IPSEC_IF_DEFAULT_SLOT_SIZE
;
2628 pcb
->ipsec_netif_ring_size
= if_ipsec_ring_size
;
2629 pcb
->ipsec_tx_fsw_ring_size
= if_ipsec_tx_fsw_ring_size
;
2630 pcb
->ipsec_rx_fsw_ring_size
= if_ipsec_rx_fsw_ring_size
;
2631 #endif // IPSEC_NEXUS
2633 lck_rw_init(&pcb
->ipsec_pcb_lock
, ipsec_lck_grp
, ipsec_lck_attr
);
2634 lck_mtx_init(&pcb
->ipsec_pcb_data_move_lock
, ipsec_lck_grp
, ipsec_lck_attr
);
2636 pcb
->ipsec_input_chain_count
= 0;
2637 lck_mtx_init(&pcb
->ipsec_input_chain_lock
, ipsec_lck_grp
, ipsec_lck_attr
);
2638 lck_mtx_init(&pcb
->ipsec_kpipe_encrypt_lock
, ipsec_lck_grp
, ipsec_lck_attr
);
2639 lck_mtx_init(&pcb
->ipsec_kpipe_decrypt_lock
, ipsec_lck_grp
, ipsec_lck_attr
);
2640 #endif // IPSEC_NEXUS
2646 ipsec_ctl_connect(kern_ctl_ref kctlref
,
2647 struct sockaddr_ctl
*sac
,
2650 struct ifnet_init_eparams ipsec_init
= {};
2653 if (*unitinfo
== NULL
) {
2654 (void)ipsec_ctl_bind(kctlref
, sac
, unitinfo
);
2657 struct ipsec_pcb
*pcb
= *unitinfo
;
2662 /* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
2663 if (pcb
->ipsec_ctlref
== NULL
) {
2664 (void)ipsec_ctl_bind(kctlref
, sac
, unitinfo
);
2667 snprintf(pcb
->ipsec_if_xname
, sizeof(pcb
->ipsec_if_xname
), "ipsec%d", pcb
->ipsec_unit
- 1);
2668 snprintf(pcb
->ipsec_unique_name
, sizeof(pcb
->ipsec_unique_name
), "ipsecid%d", pcb
->ipsec_unique_id
- 1);
2669 os_log(OS_LOG_DEFAULT
, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb
->ipsec_if_xname
, pcb
->ipsec_unique_name
);
2671 /* Create the interface */
2672 bzero(&ipsec_init
, sizeof(ipsec_init
));
2673 ipsec_init
.ver
= IFNET_INIT_CURRENT_VERSION
;
2674 ipsec_init
.len
= sizeof(ipsec_init
);
2677 if (pcb
->ipsec_use_netif
) {
2678 ipsec_init
.flags
= (IFNET_INIT_SKYWALK_NATIVE
| IFNET_INIT_NX_NOAUTO
);
2680 #endif // IPSEC_NEXUS
2682 ipsec_init
.flags
= IFNET_INIT_NX_NOAUTO
;
2683 ipsec_init
.start
= ipsec_start
;
2685 ipsec_init
.name
= "ipsec";
2686 ipsec_init
.unit
= pcb
->ipsec_unit
- 1;
2687 ipsec_init
.uniqueid
= pcb
->ipsec_unique_name
;
2688 ipsec_init
.uniqueid_len
= strlen(pcb
->ipsec_unique_name
);
2689 ipsec_init
.family
= IFNET_FAMILY_IPSEC
;
2690 ipsec_init
.type
= IFT_OTHER
;
2691 ipsec_init
.demux
= ipsec_demux
;
2692 ipsec_init
.add_proto
= ipsec_add_proto
;
2693 ipsec_init
.del_proto
= ipsec_del_proto
;
2694 ipsec_init
.softc
= pcb
;
2695 ipsec_init
.ioctl
= ipsec_ioctl
;
2696 ipsec_init
.free
= ipsec_detached
;
2699 /* We don't support kpipes without a netif */
2700 if (pcb
->ipsec_kpipe_count
&& !pcb
->ipsec_use_netif
) {
2702 os_log_error(OS_LOG_DEFAULT
, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result
);
2703 ipsec_free_pcb(pcb
, false);
2708 if (if_ipsec_debug
!= 0) {
2709 printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2710 "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2712 ipsec_init
.name
, ipsec_init
.unit
,
2713 pcb
->ipsec_use_netif
,
2714 pcb
->ipsec_kpipe_count
,
2715 pcb
->ipsec_slot_size
,
2716 pcb
->ipsec_netif_ring_size
,
2717 pcb
->ipsec_kpipe_tx_ring_size
,
2718 pcb
->ipsec_kpipe_rx_ring_size
);
2720 if (pcb
->ipsec_use_netif
) {
2721 if (pcb
->ipsec_kpipe_count
) {
2722 result
= ipsec_enable_channel(pcb
, current_proc());
2724 os_log_error(OS_LOG_DEFAULT
, "%s: %s failed to enable channels\n",
2725 __func__
, pcb
->ipsec_if_xname
);
2726 ipsec_free_pcb(pcb
, false);
2732 result
= ipsec_nexus_ifattach(pcb
, &ipsec_init
, &pcb
->ipsec_ifp
);
2734 os_log_error(OS_LOG_DEFAULT
, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result
);
2735 ipsec_free_pcb(pcb
, false);
2740 result
= ipsec_flowswitch_attach(pcb
);
2742 os_log_error(OS_LOG_DEFAULT
, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result
);
2743 // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2744 // in ipsec_detached().
2750 bpfattach(pcb
->ipsec_ifp
, DLT_RAW
, 0);
2752 #endif // IPSEC_NEXUS
2754 result
= ifnet_allocate_extended(&ipsec_init
, &pcb
->ipsec_ifp
);
2756 os_log_error(OS_LOG_DEFAULT
, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result
);
2757 ipsec_free_pcb(pcb
, false);
2761 ipsec_ifnet_set_attrs(pcb
->ipsec_ifp
);
2763 /* Attach the interface */
2764 result
= ifnet_attach(pcb
->ipsec_ifp
, NULL
);
2766 os_log_error(OS_LOG_DEFAULT
, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result
);
2767 ifnet_release(pcb
->ipsec_ifp
);
2768 ipsec_free_pcb(pcb
, false);
2774 bpfattach(pcb
->ipsec_ifp
, DLT_NULL
, 0);
2779 * Mark the data path as ready.
2780 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2782 if (pcb
->ipsec_kpipe_count
== 0) {
2783 lck_mtx_lock(&pcb
->ipsec_pcb_data_move_lock
);
2784 IPSEC_SET_DATA_PATH_READY(pcb
);
2785 lck_mtx_unlock(&pcb
->ipsec_pcb_data_move_lock
);
2789 /* The interfaces resoures allocated, mark it as running */
2790 ifnet_set_flags(pcb
->ipsec_ifp
, IFF_RUNNING
, IFF_RUNNING
);
2796 ipsec_detach_ip(ifnet_t interface
,
2797 protocol_family_t protocol
,
2800 errno_t result
= EPROTONOSUPPORT
;
2802 /* Attempt a detach */
2803 if (protocol
== PF_INET
) {
2806 bzero(&ifr
, sizeof(ifr
));
2807 snprintf(ifr
.ifr_name
, sizeof(ifr
.ifr_name
), "%s%d",
2808 ifnet_name(interface
), ifnet_unit(interface
));
2810 result
= sock_ioctl(pf_socket
, SIOCPROTODETACH
, &ifr
);
2811 } else if (protocol
== PF_INET6
) {
2812 struct in6_ifreq ifr6
;
2814 bzero(&ifr6
, sizeof(ifr6
));
2815 snprintf(ifr6
.ifr_name
, sizeof(ifr6
.ifr_name
), "%s%d",
2816 ifnet_name(interface
), ifnet_unit(interface
));
2818 result
= sock_ioctl(pf_socket
, SIOCPROTODETACH_IN6
, &ifr6
);
2825 ipsec_remove_address(ifnet_t interface
,
2826 protocol_family_t protocol
,
2832 /* Attempt a detach */
2833 if (protocol
== PF_INET
) {
2836 bzero(&ifr
, sizeof(ifr
));
2837 snprintf(ifr
.ifr_name
, sizeof(ifr
.ifr_name
), "%s%d",
2838 ifnet_name(interface
), ifnet_unit(interface
));
2839 result
= ifaddr_address(address
, &ifr
.ifr_addr
, sizeof(ifr
.ifr_addr
));
2841 os_log_error(OS_LOG_DEFAULT
, "ipsec_remove_address - ifaddr_address failed: %d", result
);
2843 result
= sock_ioctl(pf_socket
, SIOCDIFADDR
, &ifr
);
2845 os_log_error(OS_LOG_DEFAULT
, "ipsec_remove_address - SIOCDIFADDR failed: %d", result
);
2848 } else if (protocol
== PF_INET6
) {
2849 struct in6_ifreq ifr6
;
2851 bzero(&ifr6
, sizeof(ifr6
));
2852 snprintf(ifr6
.ifr_name
, sizeof(ifr6
.ifr_name
), "%s%d",
2853 ifnet_name(interface
), ifnet_unit(interface
));
2854 result
= ifaddr_address(address
, (struct sockaddr
*)&ifr6
.ifr_addr
,
2855 sizeof(ifr6
.ifr_addr
));
2857 os_log_error(OS_LOG_DEFAULT
, "ipsec_remove_address - ifaddr_address failed (v6): %d",
2860 result
= sock_ioctl(pf_socket
, SIOCDIFADDR_IN6
, &ifr6
);
2862 os_log_error(OS_LOG_DEFAULT
, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2870 ipsec_cleanup_family(ifnet_t interface
,
2871 protocol_family_t protocol
)
2874 socket_t pf_socket
= NULL
;
2875 ifaddr_t
*addresses
= NULL
;
2878 if (protocol
!= PF_INET
&& protocol
!= PF_INET6
) {
2879 os_log_error(OS_LOG_DEFAULT
, "ipsec_cleanup_family - invalid protocol family %d\n", protocol
);
2883 /* Create a socket for removing addresses and detaching the protocol */
2884 result
= sock_socket(protocol
, SOCK_DGRAM
, 0, NULL
, NULL
, &pf_socket
);
2886 if (result
!= EAFNOSUPPORT
) {
2887 os_log_error(OS_LOG_DEFAULT
, "ipsec_cleanup_family - failed to create %s socket: %d\n",
2888 protocol
== PF_INET
? "IP" : "IPv6", result
);
2893 /* always set SS_PRIV, we want to close and detach regardless */
2894 sock_setpriv(pf_socket
, 1);
2896 result
= ipsec_detach_ip(interface
, protocol
, pf_socket
);
2897 if (result
== 0 || result
== ENXIO
) {
2898 /* We are done! We either detached or weren't attached. */
2900 } else if (result
!= EBUSY
) {
2901 /* Uh, not really sure what happened here... */
2902 os_log_error(OS_LOG_DEFAULT
, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result
);
2907 * At this point, we received an EBUSY error. This means there are
2908 * addresses attached. We should detach them and then try again.
2910 result
= ifnet_get_address_list_family(interface
, &addresses
, protocol
);
2912 os_log_error(OS_LOG_DEFAULT
, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2913 ifnet_name(interface
), ifnet_unit(interface
),
2914 protocol
== PF_INET
? "PF_INET" : "PF_INET6", result
);
2918 for (i
= 0; addresses
[i
] != 0; i
++) {
2919 ipsec_remove_address(interface
, protocol
, addresses
[i
], pf_socket
);
2921 ifnet_free_address_list(addresses
);
2925 * The addresses should be gone, we should try the remove again.
2927 result
= ipsec_detach_ip(interface
, protocol
, pf_socket
);
2928 if (result
!= 0 && result
!= ENXIO
) {
2929 os_log_error(OS_LOG_DEFAULT
, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result
);
2933 if (pf_socket
!= NULL
) {
2934 sock_close(pf_socket
);
2937 if (addresses
!= NULL
) {
2938 ifnet_free_address_list(addresses
);
2943 ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref
,
2944 __unused u_int32_t unit
,
2947 struct ipsec_pcb
*pcb
= unitinfo
;
2955 /* Wait until all threads in the data paths are done. */
2956 ipsec_wait_data_move_drain(pcb
);
2959 // Tell the nexus to stop all rings
2960 if (pcb
->ipsec_netif_nexus
!= NULL
) {
2961 kern_nexus_stop(pcb
->ipsec_netif_nexus
);
2963 #endif // IPSEC_NEXUS
2965 lck_rw_lock_exclusive(&pcb
->ipsec_pcb_lock
);
2968 if (if_ipsec_debug
!= 0) {
2969 printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2970 pcb
->ipsec_if_xname
, pcb
->ipsec_unique_name
);
2973 struct ipsec_detached_channels dc
;
2974 ipsec_detach_channels(pcb
, &dc
);
2975 #endif // IPSEC_NEXUS
2977 pcb
->ipsec_ctlref
= NULL
;
2979 ifp
= pcb
->ipsec_ifp
;
2982 if (pcb
->ipsec_netif_nexus
!= NULL
) {
2984 * Quiesce the interface and flush any pending outbound packets.
2988 /* Increment refcnt, but detach interface */
2989 ifnet_incr_iorefcnt(ifp
);
2990 if ((result
= ifnet_detach(ifp
)) != 0) {
2991 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result
);
2996 * We want to do everything in our power to ensure that the interface
2997 * really goes away when the socket is closed. We must remove IP/IPv6
2998 * addresses and detach the protocols. Finally, we can remove and
2999 * release the interface.
3001 key_delsp_for_ipsec_if(ifp
);
3003 ipsec_cleanup_family(ifp
, AF_INET
);
3004 ipsec_cleanup_family(ifp
, AF_INET6
);
3006 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
3008 ipsec_free_channels(&dc
);
3010 ipsec_nexus_detach(pcb
);
3012 /* Decrement refcnt to finish detaching and freeing */
3013 ifnet_decr_iorefcnt(ifp
);
3015 #endif // IPSEC_NEXUS
3017 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
3020 ipsec_free_channels(&dc
);
3021 #endif // IPSEC_NEXUS
3024 * We want to do everything in our power to ensure that the interface
3025 * really goes away when the socket is closed. We must remove IP/IPv6
3026 * addresses and detach the protocols. Finally, we can remove and
3027 * release the interface.
3029 key_delsp_for_ipsec_if(ifp
);
3031 ipsec_cleanup_family(ifp
, AF_INET
);
3032 ipsec_cleanup_family(ifp
, AF_INET6
);
3035 * Detach now; ipsec_detach() will be called asynchronously once
3036 * the I/O reference count drops to 0. There we will invoke
3039 if ((result
= ifnet_detach(ifp
)) != 0) {
3040 os_log_error(OS_LOG_DEFAULT
, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result
);
3044 // Bound, but not connected
3045 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
3046 ipsec_free_pcb(pcb
, false);
3053 ipsec_ctl_send(__unused kern_ctl_ref kctlref
,
3054 __unused u_int32_t unit
,
3055 __unused
void *unitinfo
,
3059 /* Receive messages from the control socket. Currently unused. */
3065 ipsec_ctl_setopt(__unused kern_ctl_ref kctlref
,
3066 __unused u_int32_t unit
,
3073 struct ipsec_pcb
*pcb
= unitinfo
;
3078 /* check for privileges for privileged options */
3080 case IPSEC_OPT_FLAGS
:
3081 case IPSEC_OPT_EXT_IFDATA_STATS
:
3082 case IPSEC_OPT_SET_DELEGATE_INTERFACE
:
3083 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS
:
3084 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3091 case IPSEC_OPT_FLAGS
: {
3092 if (len
!= sizeof(u_int32_t
)) {
3095 pcb
->ipsec_external_flags
= *(u_int32_t
*)data
;
3100 case IPSEC_OPT_EXT_IFDATA_STATS
: {
3101 if (len
!= sizeof(int)) {
3105 if (pcb
->ipsec_ifp
== NULL
) {
3106 // Only can set after connecting
3110 pcb
->ipsec_ext_ifdata_stats
= (*(int *)data
) ? 1 : 0;
3114 case IPSEC_OPT_INC_IFDATA_STATS_IN
:
3115 case IPSEC_OPT_INC_IFDATA_STATS_OUT
: {
3116 struct ipsec_stats_param
*utsp
= (struct ipsec_stats_param
*)data
;
3118 if (utsp
== NULL
|| len
< sizeof(struct ipsec_stats_param
)) {
3122 if (pcb
->ipsec_ifp
== NULL
) {
3123 // Only can set after connecting
3127 if (!pcb
->ipsec_ext_ifdata_stats
) {
3131 if (opt
== IPSEC_OPT_INC_IFDATA_STATS_IN
) {
3132 ifnet_stat_increment_in(pcb
->ipsec_ifp
, utsp
->utsp_packets
,
3133 utsp
->utsp_bytes
, utsp
->utsp_errors
);
3135 ifnet_stat_increment_out(pcb
->ipsec_ifp
, utsp
->utsp_packets
,
3136 utsp
->utsp_bytes
, utsp
->utsp_errors
);
3141 case IPSEC_OPT_SET_DELEGATE_INTERFACE
: {
3142 ifnet_t del_ifp
= NULL
;
3143 char name
[IFNAMSIZ
];
3145 if (len
> IFNAMSIZ
- 1) {
3149 if (pcb
->ipsec_ifp
== NULL
) {
3150 // Only can set after connecting
3154 if (len
!= 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3155 bcopy(data
, name
, len
);
3157 result
= ifnet_find_by_name(name
, &del_ifp
);
3160 os_log_error(OS_LOG_DEFAULT
, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3161 __func__
, pcb
->ipsec_ifp
->if_xname
,
3162 del_ifp
? del_ifp
->if_xname
: "NULL");
3164 result
= ifnet_set_delegate(pcb
->ipsec_ifp
, del_ifp
);
3166 ifnet_release(del_ifp
);
3172 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS
: {
3173 if (len
!= sizeof(int)) {
3177 if (pcb
->ipsec_ifp
== NULL
) {
3178 // Only can set after connecting
3182 mbuf_svc_class_t output_service_class
= so_tc2msc(*(int *)data
);
3183 if (output_service_class
== MBUF_SC_UNSPEC
) {
3184 pcb
->ipsec_output_service_class
= MBUF_SC_OAM
;
3186 pcb
->ipsec_output_service_class
= output_service_class
;
3188 os_log_error(OS_LOG_DEFAULT
, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3189 __func__
, pcb
->ipsec_ifp
->if_xname
,
3190 pcb
->ipsec_output_service_class
);
3195 case IPSEC_OPT_ENABLE_CHANNEL
: {
3196 if (len
!= sizeof(int)) {
3200 if (pcb
->ipsec_ifp
!= NULL
) {
3201 // Only can set before connecting
3205 if ((*(int *)data
) != 0 &&
3206 (*(int *)data
) != 1 &&
3207 (*(int *)data
) != IPSEC_IF_WMM_RING_COUNT
) {
3211 lck_rw_lock_exclusive(&pcb
->ipsec_pcb_lock
);
3212 pcb
->ipsec_kpipe_count
= *(int *)data
;
3213 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
3217 case IPSEC_OPT_CHANNEL_BIND_PID
: {
3218 if (len
!= sizeof(pid_t
)) {
3222 if (pcb
->ipsec_ifp
!= NULL
) {
3223 // Only can set before connecting
3227 lck_rw_lock_exclusive(&pcb
->ipsec_pcb_lock
);
3228 pcb
->ipsec_kpipe_pid
= *(pid_t
*)data
;
3229 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
3233 case IPSEC_OPT_ENABLE_FLOWSWITCH
: {
3234 if (len
!= sizeof(int)) {
3238 if (pcb
->ipsec_ifp
== NULL
) {
3239 // Only can set after connecting
3243 if (!if_is_fsw_transport_netagent_enabled()) {
3247 if (uuid_is_null(pcb
->ipsec_nx
.fsw_agent
)) {
3252 uint32_t flags
= netagent_get_flags(pcb
->ipsec_nx
.fsw_agent
);
3255 flags
|= (NETAGENT_FLAG_NEXUS_PROVIDER
|
3256 NETAGENT_FLAG_NEXUS_LISTENER
);
3257 result
= netagent_set_flags(pcb
->ipsec_nx
.fsw_agent
, flags
);
3258 pcb
->ipsec_needs_netagent
= true;
3260 pcb
->ipsec_needs_netagent
= false;
3261 flags
&= ~(NETAGENT_FLAG_NEXUS_PROVIDER
|
3262 NETAGENT_FLAG_NEXUS_LISTENER
);
3263 result
= netagent_set_flags(pcb
->ipsec_nx
.fsw_agent
, flags
);
3268 case IPSEC_OPT_INPUT_FRAG_SIZE
: {
3269 if (len
!= sizeof(u_int32_t
)) {
3273 u_int32_t input_frag_size
= *(u_int32_t
*)data
;
3274 if (input_frag_size
<= sizeof(struct ip6_hdr
)) {
3275 pcb
->ipsec_frag_size_set
= FALSE
;
3276 pcb
->ipsec_input_frag_size
= 0;
3278 pcb
->ipsec_frag_size_set
= TRUE
;
3279 pcb
->ipsec_input_frag_size
= input_frag_size
;
3283 case IPSEC_OPT_ENABLE_NETIF
: {
3284 if (len
!= sizeof(int)) {
3288 if (pcb
->ipsec_ifp
!= NULL
) {
3289 // Only can set before connecting
3293 lck_rw_lock_exclusive(&pcb
->ipsec_pcb_lock
);
3294 pcb
->ipsec_use_netif
= !!(*(int *)data
);
3295 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
3298 case IPSEC_OPT_SLOT_SIZE
: {
3299 if (len
!= sizeof(u_int32_t
)) {
3303 if (pcb
->ipsec_ifp
!= NULL
) {
3304 // Only can set before connecting
3308 u_int32_t slot_size
= *(u_int32_t
*)data
;
3309 if (slot_size
< IPSEC_IF_MIN_SLOT_SIZE
||
3310 slot_size
> IPSEC_IF_MAX_SLOT_SIZE
) {
3313 pcb
->ipsec_slot_size
= slot_size
;
3314 if (if_ipsec_debug
!= 0) {
3315 printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__
, slot_size
);
3319 case IPSEC_OPT_NETIF_RING_SIZE
: {
3320 if (len
!= sizeof(u_int32_t
)) {
3324 if (pcb
->ipsec_ifp
!= NULL
) {
3325 // Only can set before connecting
3329 u_int32_t ring_size
= *(u_int32_t
*)data
;
3330 if (ring_size
< IPSEC_IF_MIN_RING_SIZE
||
3331 ring_size
> IPSEC_IF_MAX_RING_SIZE
) {
3334 pcb
->ipsec_netif_ring_size
= ring_size
;
3335 if (if_ipsec_debug
!= 0) {
3336 printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__
, ring_size
);
3340 case IPSEC_OPT_TX_FSW_RING_SIZE
: {
3341 if (len
!= sizeof(u_int32_t
)) {
3345 if (pcb
->ipsec_ifp
!= NULL
) {
3346 // Only can set before connecting
3350 u_int32_t ring_size
= *(u_int32_t
*)data
;
3351 if (ring_size
< IPSEC_IF_MIN_RING_SIZE
||
3352 ring_size
> IPSEC_IF_MAX_RING_SIZE
) {
3355 pcb
->ipsec_tx_fsw_ring_size
= ring_size
;
3356 if (if_ipsec_debug
!= 0) {
3357 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__
, ring_size
);
3361 case IPSEC_OPT_RX_FSW_RING_SIZE
: {
3362 if (len
!= sizeof(u_int32_t
)) {
3366 if (pcb
->ipsec_ifp
!= NULL
) {
3367 // Only can set before connecting
3371 u_int32_t ring_size
= *(u_int32_t
*)data
;
3372 if (ring_size
< IPSEC_IF_MIN_RING_SIZE
||
3373 ring_size
> IPSEC_IF_MAX_RING_SIZE
) {
3376 pcb
->ipsec_rx_fsw_ring_size
= ring_size
;
3377 if (if_ipsec_debug
!= 0) {
3378 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__
, ring_size
);
3382 case IPSEC_OPT_KPIPE_TX_RING_SIZE
: {
3383 if (len
!= sizeof(u_int32_t
)) {
3387 if (pcb
->ipsec_ifp
!= NULL
) {
3388 // Only can set before connecting
3392 u_int32_t ring_size
= *(u_int32_t
*)data
;
3393 if (ring_size
< IPSEC_IF_MIN_RING_SIZE
||
3394 ring_size
> IPSEC_IF_MAX_RING_SIZE
) {
3397 pcb
->ipsec_kpipe_tx_ring_size
= ring_size
;
3398 if (if_ipsec_debug
!= 0) {
3399 printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__
, ring_size
);
3403 case IPSEC_OPT_KPIPE_RX_RING_SIZE
: {
3404 if (len
!= sizeof(u_int32_t
)) {
3408 if (pcb
->ipsec_ifp
!= NULL
) {
3409 // Only can set before connecting
3413 u_int32_t ring_size
= *(u_int32_t
*)data
;
3414 if (ring_size
< IPSEC_IF_MIN_RING_SIZE
||
3415 ring_size
> IPSEC_IF_MAX_RING_SIZE
) {
3418 pcb
->ipsec_kpipe_rx_ring_size
= ring_size
;
3419 if (if_ipsec_debug
!= 0) {
3420 printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__
, ring_size
);
3425 #endif // IPSEC_NEXUS
3428 result
= ENOPROTOOPT
;
3437 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref
,
3438 __unused u_int32_t unit
,
3445 struct ipsec_pcb
*pcb
= unitinfo
;
3451 case IPSEC_OPT_FLAGS
: {
3452 if (*len
!= sizeof(u_int32_t
)) {
3455 *(u_int32_t
*)data
= pcb
->ipsec_external_flags
;
3460 case IPSEC_OPT_EXT_IFDATA_STATS
: {
3461 if (*len
!= sizeof(int)) {
3464 *(int *)data
= (pcb
->ipsec_ext_ifdata_stats
) ? 1 : 0;
3469 case IPSEC_OPT_IFNAME
: {
3470 if (*len
< MIN(strlen(pcb
->ipsec_if_xname
) + 1, sizeof(pcb
->ipsec_if_xname
))) {
3473 if (pcb
->ipsec_ifp
== NULL
) {
3474 // Only can get after connecting
3478 *len
= scnprintf(data
, *len
, "%s", pcb
->ipsec_if_xname
) + 1;
3483 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS
: {
3484 if (*len
!= sizeof(int)) {
3487 *(int *)data
= so_svc2tc(pcb
->ipsec_output_service_class
);
3494 case IPSEC_OPT_ENABLE_CHANNEL
: {
3495 if (*len
!= sizeof(int)) {
3498 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
3499 *(int *)data
= pcb
->ipsec_kpipe_count
;
3500 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
3505 case IPSEC_OPT_CHANNEL_BIND_PID
: {
3506 if (*len
!= sizeof(pid_t
)) {
3509 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
3510 *(pid_t
*)data
= pcb
->ipsec_kpipe_pid
;
3511 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
3516 case IPSEC_OPT_ENABLE_FLOWSWITCH
: {
3517 if (*len
!= sizeof(int)) {
3520 *(int *)data
= if_check_netagent(pcb
->ipsec_ifp
, pcb
->ipsec_nx
.fsw_agent
);
3525 case IPSEC_OPT_ENABLE_NETIF
: {
3526 if (*len
!= sizeof(int)) {
3529 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
3530 *(int *)data
= !!pcb
->ipsec_use_netif
;
3531 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
3536 case IPSEC_OPT_GET_CHANNEL_UUID
: {
3537 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
3538 if (!ipsec_flag_isset(pcb
, IPSEC_FLAGS_KPIPE_ALLOCATED
)) {
3540 } else if (*len
!= sizeof(uuid_t
) * pcb
->ipsec_kpipe_count
) {
3543 for (unsigned int i
= 0; i
< pcb
->ipsec_kpipe_count
; i
++) {
3544 uuid_copy(((uuid_t
*)data
)[i
], pcb
->ipsec_kpipe_uuid
[i
]);
3547 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
3551 case IPSEC_OPT_INPUT_FRAG_SIZE
: {
3552 if (*len
!= sizeof(u_int32_t
)) {
3555 *(u_int32_t
*)data
= pcb
->ipsec_input_frag_size
;
3559 case IPSEC_OPT_SLOT_SIZE
: {
3560 if (*len
!= sizeof(u_int32_t
)) {
3563 *(u_int32_t
*)data
= pcb
->ipsec_slot_size
;
3567 case IPSEC_OPT_NETIF_RING_SIZE
: {
3568 if (*len
!= sizeof(u_int32_t
)) {
3571 *(u_int32_t
*)data
= pcb
->ipsec_netif_ring_size
;
3575 case IPSEC_OPT_TX_FSW_RING_SIZE
: {
3576 if (*len
!= sizeof(u_int32_t
)) {
3579 *(u_int32_t
*)data
= pcb
->ipsec_tx_fsw_ring_size
;
3583 case IPSEC_OPT_RX_FSW_RING_SIZE
: {
3584 if (*len
!= sizeof(u_int32_t
)) {
3587 *(u_int32_t
*)data
= pcb
->ipsec_rx_fsw_ring_size
;
3591 case IPSEC_OPT_KPIPE_TX_RING_SIZE
: {
3592 if (*len
!= sizeof(u_int32_t
)) {
3595 *(u_int32_t
*)data
= pcb
->ipsec_kpipe_tx_ring_size
;
3599 case IPSEC_OPT_KPIPE_RX_RING_SIZE
: {
3600 if (*len
!= sizeof(u_int32_t
)) {
3603 *(u_int32_t
*)data
= pcb
->ipsec_kpipe_rx_ring_size
;
3608 #endif // IPSEC_NEXUS
3611 result
= ENOPROTOOPT
;
3619 /* Network Interface functions */
3621 ipsec_output(ifnet_t interface
,
3624 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3625 struct ipsec_output_state ipsec_state
;
3627 struct route_in6 ro6
;
3629 struct ip
*ip
= NULL
;
3630 struct ip6_hdr
*ip6
= NULL
;
3631 struct ip_out_args ipoa
;
3632 struct ip6_out_args ip6oa
;
3634 u_int ip_version
= 0;
3636 struct flowadv
*adv
= NULL
;
3638 // Make sure this packet isn't looping through the interface
3639 if (necp_get_last_interface_index_from_packet(data
) == interface
->if_index
) {
3641 goto ipsec_output_err
;
3644 // Mark the interface so NECP can evaluate tunnel policy
3645 necp_mark_packet_from_interface(data
, interface
);
3647 if (data
->m_len
< sizeof(*ip
)) {
3648 os_log_error(OS_LOG_DEFAULT
, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data
->m_len
);
3649 IPSEC_STAT_INCREMENT(ipsecstat
.out_inval
);
3651 goto ipsec_output_err
;
3654 ip
= mtod(data
, struct ip
*);
3655 ip_version
= ip
->ip_v
;
3657 switch (ip_version
) {
3659 u_int8_t ip_hlen
= 0;
3661 ip_hlen
= _IP_VHL_HL(ip
->ip_vhl
) << 2;
3663 ip_hlen
= ip
->ip_hl
<< 2;
3665 if (ip_hlen
< sizeof(*ip
)) {
3666 os_log_error(OS_LOG_DEFAULT
, "ipsec_output: Bad ip header length %d.\n", ip_hlen
);
3667 IPSEC_STAT_INCREMENT(ipsecstat
.out_inval
);
3669 goto ipsec_output_err
;
3672 if (!pcb
->ipsec_use_netif
)
3673 #endif // IPSEC_NEXUS
3676 bpf_tap_out(pcb
->ipsec_ifp
, DLT_NULL
, data
, &af
, sizeof(af
));
3679 /* Apply encryption */
3680 memset(&ipsec_state
, 0, sizeof(ipsec_state
));
3681 ipsec_state
.m
= data
;
3682 ipsec_state
.dst
= (struct sockaddr
*)&ip
->ip_dst
;
3683 memset(&ipsec_state
.ro
, 0, sizeof(ipsec_state
.ro
));
3685 error
= ipsec4_interface_output(&ipsec_state
, interface
);
3686 /* Tunneled in IPv6 - packet is gone */
3687 if (error
== 0 && ipsec_state
.tunneled
== 6) {
3691 data
= ipsec_state
.m
;
3692 if (error
|| data
== NULL
) {
3694 os_log_error(OS_LOG_DEFAULT
, "ipsec_output: ipsec4_output error %d.\n", error
);
3696 goto ipsec_output_err
;
3699 /* Set traffic class, set flow */
3700 m_set_service_class(data
, pcb
->ipsec_output_service_class
);
3701 data
->m_pkthdr
.pkt_flowsrc
= FLOWSRC_IFNET
;
3702 data
->m_pkthdr
.pkt_flowid
= interface
->if_flowhash
;
3703 data
->m_pkthdr
.pkt_proto
= ip
->ip_p
;
3704 data
->m_pkthdr
.pkt_flags
= (PKTF_FLOW_ID
| PKTF_FLOW_ADV
| PKTF_FLOW_LOCALSRC
);
3706 /* Flip endian-ness for ip_output */
3707 ip
= mtod(data
, struct ip
*);
3711 /* Increment statistics */
3712 length
= mbuf_pkthdr_len(data
);
3713 ifnet_stat_increment_out(interface
, 1, length
, 0);
3715 /* Send to ip_output */
3716 memset(&ro
, 0, sizeof(ro
));
3718 flags
= (IP_OUTARGS
| /* Passing out args to specify interface */
3719 IP_NOIPSEC
); /* To ensure the packet doesn't go through ipsec twice */
3721 memset(&ipoa
, 0, sizeof(ipoa
));
3722 ipoa
.ipoa_flowadv
.code
= 0;
3723 ipoa
.ipoa_flags
= IPOAF_SELECT_SRCIF
| IPOAF_BOUND_SRCADDR
;
3724 if (ipsec_state
.outgoing_if
) {
3725 ipoa
.ipoa_boundif
= ipsec_state
.outgoing_if
;
3726 ipoa
.ipoa_flags
|= IPOAF_BOUND_IF
;
3728 ipsec_set_ipoa_for_interface(pcb
->ipsec_ifp
, &ipoa
);
3730 adv
= &ipoa
.ipoa_flowadv
;
3732 (void)ip_output(data
, NULL
, &ro
, flags
, NULL
, &ipoa
);
3735 if (adv
->code
== FADV_FLOW_CONTROLLED
|| adv
->code
== FADV_SUSPENDED
) {
3737 ifnet_disable_output(interface
);
3743 if (data
->m_len
< sizeof(*ip6
)) {
3744 os_log_error(OS_LOG_DEFAULT
, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data
->m_len
);
3745 IPSEC_STAT_INCREMENT(ipsec6stat
.out_inval
);
3747 goto ipsec_output_err
;
3750 if (!pcb
->ipsec_use_netif
)
3751 #endif // IPSEC_NEXUS
3754 bpf_tap_out(pcb
->ipsec_ifp
, DLT_NULL
, data
, &af
, sizeof(af
));
3757 data
= ipsec6_splithdr(data
);
3759 os_log_error(OS_LOG_DEFAULT
, "ipsec_output: ipsec6_splithdr returned NULL\n");
3760 goto ipsec_output_err
;
3763 ip6
= mtod(data
, struct ip6_hdr
*);
3765 memset(&ipsec_state
, 0, sizeof(ipsec_state
));
3766 ipsec_state
.m
= data
;
3767 ipsec_state
.dst
= (struct sockaddr
*)&ip6
->ip6_dst
;
3768 memset(&ipsec_state
.ro
, 0, sizeof(ipsec_state
.ro
));
3770 error
= ipsec6_interface_output(&ipsec_state
, interface
, &ip6
->ip6_nxt
, ipsec_state
.m
);
3771 if (error
== 0 && ipsec_state
.tunneled
== 4) { /* tunneled in IPv4 - packet is gone */
3774 data
= ipsec_state
.m
;
3775 if (error
|| data
== NULL
) {
3777 os_log_error(OS_LOG_DEFAULT
, "ipsec_output: ipsec6_output error %d\n", error
);
3779 goto ipsec_output_err
;
3782 /* Set traffic class, set flow */
3783 m_set_service_class(data
, pcb
->ipsec_output_service_class
);
3784 data
->m_pkthdr
.pkt_flowsrc
= FLOWSRC_IFNET
;
3785 data
->m_pkthdr
.pkt_flowid
= interface
->if_flowhash
;
3786 data
->m_pkthdr
.pkt_proto
= ip6
->ip6_nxt
;
3787 data
->m_pkthdr
.pkt_flags
= (PKTF_FLOW_ID
| PKTF_FLOW_ADV
| PKTF_FLOW_LOCALSRC
);
3789 /* Increment statistics */
3790 length
= mbuf_pkthdr_len(data
);
3791 ifnet_stat_increment_out(interface
, 1, length
, 0);
3793 /* Send to ip6_output */
3794 memset(&ro6
, 0, sizeof(ro6
));
3796 flags
= IPV6_OUTARGS
;
3798 memset(&ip6oa
, 0, sizeof(ip6oa
));
3799 ip6oa
.ip6oa_flowadv
.code
= 0;
3800 ip6oa
.ip6oa_flags
= IP6OAF_SELECT_SRCIF
| IP6OAF_BOUND_SRCADDR
;
3801 if (ipsec_state
.outgoing_if
) {
3802 ip6oa
.ip6oa_boundif
= ipsec_state
.outgoing_if
;
3803 ip6oa
.ip6oa_flags
|= IP6OAF_BOUND_IF
;
3805 ipsec_set_ip6oa_for_interface(pcb
->ipsec_ifp
, &ip6oa
);
3807 adv
= &ip6oa
.ip6oa_flowadv
;
3809 (void) ip6_output(data
, NULL
, &ro6
, flags
, NULL
, NULL
, &ip6oa
);
3812 if (adv
->code
== FADV_FLOW_CONTROLLED
|| adv
->code
== FADV_SUSPENDED
) {
3814 ifnet_disable_output(interface
);
3820 os_log_error(OS_LOG_DEFAULT
, "ipsec_output: Received unknown packet version %d.\n", ip_version
);
3822 goto ipsec_output_err
;
3837 ipsec_start(ifnet_t interface
)
3840 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3842 VERIFY(pcb
!= NULL
);
3844 if (ifnet_dequeue(interface
, &data
) != 0) {
3847 if (ipsec_output(interface
, data
) != 0) {
3853 /* Network Interface functions */
3855 ipsec_demux(__unused ifnet_t interface
,
3857 __unused
char *frame_header
,
3858 protocol_family_t
*protocol
)
3863 while (data
!= NULL
&& mbuf_len(data
) < 1) {
3864 data
= mbuf_next(data
);
3871 ip
= mtod(data
, struct ip
*);
3872 ip_version
= ip
->ip_v
;
3874 switch (ip_version
) {
3876 *protocol
= PF_INET
;
3879 *protocol
= PF_INET6
;
3889 ipsec_add_proto(__unused ifnet_t interface
,
3890 protocol_family_t protocol
,
3891 __unused
const struct ifnet_demux_desc
*demux_array
,
3892 __unused u_int32_t demux_count
)
3907 ipsec_del_proto(__unused ifnet_t interface
,
3908 __unused protocol_family_t protocol
)
3914 ipsec_ioctl(ifnet_t interface
,
3919 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3926 if (pcb
->ipsec_use_netif
) {
3927 // Make sure we can fit packets in the channel buffers
3928 if (((uint64_t)((struct ifreq
*)data
)->ifr_mtu
) > pcb
->ipsec_slot_size
) {
3931 ifnet_set_mtu(interface
, (uint32_t)((struct ifreq
*)data
)->ifr_mtu
);
3934 #endif // IPSEC_NEXUS
3936 ifnet_set_mtu(interface
, ((struct ifreq
*)data
)->ifr_mtu
);
3942 /* ifioctl() takes care of it */
3945 case SIOCSIFSUBFAMILY
: {
3948 subfamily
= ((struct ifreq
*)data
)->ifr_type
.ift_subfamily
;
3949 switch (subfamily
) {
3950 case IFRTYPE_SUBFAMILY_BLUETOOTH
:
3951 interface
->if_subfamily
= IFNET_SUBFAMILY_BLUETOOTH
;
3953 case IFRTYPE_SUBFAMILY_WIFI
:
3954 interface
->if_subfamily
= IFNET_SUBFAMILY_WIFI
;
3956 case IFRTYPE_SUBFAMILY_QUICKRELAY
:
3957 interface
->if_subfamily
= IFNET_SUBFAMILY_QUICKRELAY
;
3959 case IFRTYPE_SUBFAMILY_DEFAULT
:
3960 interface
->if_subfamily
= IFNET_SUBFAMILY_DEFAULT
;
3970 result
= EOPNOTSUPP
;
3977 ipsec_detached(ifnet_t interface
)
3979 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3981 (void)ifnet_release(interface
);
3982 lck_mtx_lock(&ipsec_lock
);
3983 ipsec_free_pcb(pcb
, true);
3984 (void)ifnet_dispose(interface
);
3985 lck_mtx_unlock(&ipsec_lock
);
3988 /* Protocol Handlers */
3991 ipsec_proto_input(ifnet_t interface
,
3992 protocol_family_t protocol
,
3994 __unused
char *frame_header
)
3996 mbuf_pkthdr_setrcvif(m
, interface
);
3999 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
4000 if (!pcb
->ipsec_use_netif
)
4001 #endif // IPSEC_NEXUS
4004 struct ip
*ip
= mtod(m
, struct ip
*);
4005 if (ip
->ip_v
== 4) {
4007 } else if (ip
->ip_v
== 6) {
4010 bpf_tap_in(interface
, DLT_NULL
, m
, &af
, sizeof(af
));
4011 pktap_input(interface
, protocol
, m
, NULL
);
4014 int32_t pktlen
= m
->m_pkthdr
.len
;
4015 if (proto_input(protocol
, m
) != 0) {
4016 ifnet_stat_increment_in(interface
, 0, 0, 1);
4019 ifnet_stat_increment_in(interface
, 1, pktlen
, 0);
4026 ipsec_proto_pre_output(__unused ifnet_t interface
,
4027 protocol_family_t protocol
,
4028 __unused mbuf_t
*packet
,
4029 __unused
const struct sockaddr
*dest
,
4030 __unused
void *route
,
4031 __unused
char *frame_type
,
4032 __unused
char *link_layer_dest
)
4034 *(protocol_family_t
*)(void *)frame_type
= protocol
;
4039 ipsec_attach_proto(ifnet_t interface
,
4040 protocol_family_t protocol
)
4042 struct ifnet_attach_proto_param proto
;
4045 bzero(&proto
, sizeof(proto
));
4046 proto
.input
= ipsec_proto_input
;
4047 proto
.pre_output
= ipsec_proto_pre_output
;
4049 result
= ifnet_attach_protocol(interface
, protocol
, &proto
);
4050 if (result
!= 0 && result
!= EEXIST
) {
4051 os_log_error(OS_LOG_DEFAULT
, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4059 ipsec_inject_inbound_packet(ifnet_t interface
,
4063 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
4065 if (pcb
->ipsec_use_netif
) {
4066 if (!ipsec_data_move_begin(pcb
)) {
4067 os_log_info(OS_LOG_DEFAULT
, "%s: data path stopped for %s\n", __func__
,
4068 if_name(pcb
->ipsec_ifp
));
4072 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
4074 lck_mtx_lock(&pcb
->ipsec_input_chain_lock
);
4076 if (pcb
->ipsec_input_chain_count
> (u_int32_t
)if_ipsec_max_pending_input
) {
4077 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
4078 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
4079 ipsec_data_move_end(pcb
);
4083 if (pcb
->ipsec_input_chain
!= NULL
) {
4084 pcb
->ipsec_input_chain_last
->m_nextpkt
= packet
;
4086 pcb
->ipsec_input_chain
= packet
;
4088 pcb
->ipsec_input_chain_count
++;
4089 while (packet
->m_nextpkt
) {
4090 VERIFY(packet
!= packet
->m_nextpkt
);
4091 packet
= packet
->m_nextpkt
;
4092 pcb
->ipsec_input_chain_count
++;
4094 pcb
->ipsec_input_chain_last
= packet
;
4095 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
4097 kern_channel_ring_t rx_ring
= pcb
->ipsec_netif_rxring
[0];
4098 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
4100 if (rx_ring
!= NULL
) {
4101 kern_channel_notify(rx_ring
, 0);
4104 ipsec_data_move_end(pcb
);
4107 #endif // IPSEC_NEXUS
4110 protocol_family_t protocol
;
4111 if ((error
= ipsec_demux(interface
, packet
, NULL
, &protocol
)) != 0) {
4115 return ipsec_proto_input(interface
, protocol
, packet
, NULL
);
4120 ipsec_set_pkthdr_for_interface(ifnet_t interface
, mbuf_t packet
, int family
)
4122 if (packet
!= NULL
&& interface
!= NULL
) {
4123 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
4125 /* Set traffic class, set flow */
4126 m_set_service_class(packet
, pcb
->ipsec_output_service_class
);
4127 packet
->m_pkthdr
.pkt_flowsrc
= FLOWSRC_IFNET
;
4128 packet
->m_pkthdr
.pkt_flowid
= interface
->if_flowhash
;
4129 if (family
== AF_INET
) {
4130 struct ip
*ip
= mtod(packet
, struct ip
*);
4131 packet
->m_pkthdr
.pkt_proto
= ip
->ip_p
;
4132 } else if (family
== AF_INET6
) {
4133 struct ip6_hdr
*ip6
= mtod(packet
, struct ip6_hdr
*);
4134 packet
->m_pkthdr
.pkt_proto
= ip6
->ip6_nxt
;
4136 packet
->m_pkthdr
.pkt_flags
= (PKTF_FLOW_ID
| PKTF_FLOW_ADV
| PKTF_FLOW_LOCALSRC
);
4142 ipsec_set_ipoa_for_interface(ifnet_t interface
, struct ip_out_args
*ipoa
)
4144 struct ipsec_pcb
*pcb
;
4146 if (interface
== NULL
|| ipoa
== NULL
) {
4149 pcb
= ifnet_softc(interface
);
4151 if (net_qos_policy_restricted
== 0) {
4152 ipoa
->ipoa_flags
|= IPOAF_QOSMARKING_ALLOWED
;
4153 ipoa
->ipoa_sotc
= so_svc2tc(pcb
->ipsec_output_service_class
);
4154 } else if (pcb
->ipsec_output_service_class
!= MBUF_SC_VO
||
4155 net_qos_policy_restrict_avapps
!= 0) {
4156 ipoa
->ipoa_flags
&= ~IPOAF_QOSMARKING_ALLOWED
;
4158 ipoa
->ipoa_flags
|= IP6OAF_QOSMARKING_ALLOWED
;
4159 ipoa
->ipoa_sotc
= SO_TC_VO
;
4164 ipsec_set_ip6oa_for_interface(ifnet_t interface
, struct ip6_out_args
*ip6oa
)
4166 struct ipsec_pcb
*pcb
;
4168 if (interface
== NULL
|| ip6oa
== NULL
) {
4171 pcb
= ifnet_softc(interface
);
4173 if (net_qos_policy_restricted
== 0) {
4174 ip6oa
->ip6oa_flags
|= IPOAF_QOSMARKING_ALLOWED
;
4175 ip6oa
->ip6oa_sotc
= so_svc2tc(pcb
->ipsec_output_service_class
);
4176 } else if (pcb
->ipsec_output_service_class
!= MBUF_SC_VO
||
4177 net_qos_policy_restrict_avapps
!= 0) {
4178 ip6oa
->ip6oa_flags
&= ~IPOAF_QOSMARKING_ALLOWED
;
4180 ip6oa
->ip6oa_flags
|= IP6OAF_QOSMARKING_ALLOWED
;
4181 ip6oa
->ip6oa_sotc
= SO_TC_VO
;
4187 ipsec_data_move_drain(struct ipsec_pcb
*pcb
)
4189 lck_mtx_lock(&pcb
->ipsec_pcb_data_move_lock
);
4190 /* data path must already be marked as not ready */
4191 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb
));
4192 pcb
->ipsec_pcb_drainers
++;
4193 while (pcb
->ipsec_pcb_data_move
!= 0) {
4194 (void)msleep(&(pcb
->ipsec_pcb_data_move
), &pcb
->ipsec_pcb_data_move_lock
,
4195 (PZERO
- 1), __func__
, NULL
);
4197 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb
));
4198 VERIFY(pcb
->ipsec_pcb_drainers
> 0);
4199 pcb
->ipsec_pcb_drainers
--;
4200 lck_mtx_unlock(&pcb
->ipsec_pcb_data_move_lock
);
4204 ipsec_wait_data_move_drain(struct ipsec_pcb
*pcb
)
4207 * Mark the data path as not usable.
4209 lck_mtx_lock(&pcb
->ipsec_pcb_data_move_lock
);
4210 IPSEC_CLR_DATA_PATH_READY(pcb
);
4211 lck_mtx_unlock(&pcb
->ipsec_pcb_data_move_lock
);
4213 /* Wait until all threads in the data paths are done. */
4214 ipsec_data_move_drain(pcb
);