2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/systm.h>
30 #include <sys/kern_control.h>
31 #include <net/kpi_protocol.h>
32 #include <net/kpi_interface.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
36 #include <net/if_types.h>
38 #include <net/if_ipsec.h>
40 #include <sys/sockio.h>
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43 #include <netinet6/in6_var.h>
44 #include <netinet6/ip6_var.h>
45 #include <sys/kauth.h>
46 #include <netinet6/ipsec.h>
47 #include <netinet6/ipsec6.h>
48 #include <netinet6/esp.h>
49 #include <netinet6/esp6.h>
50 #include <netinet/ip.h>
51 #include <net/flowadv.h>
53 #include <netkey/key.h>
54 #include <net/pktap.h>
55 #include <kern/zalloc.h>
59 extern int net_qos_policy_restricted
;
60 extern int net_qos_policy_restrict_avapps
;
61 extern unsigned int if_enable_netagent
;
63 /* Kernel Control functions */
64 static errno_t
ipsec_ctl_bind(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
66 static errno_t
ipsec_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
68 static errno_t
ipsec_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t unit
,
70 static errno_t
ipsec_ctl_send(kern_ctl_ref kctlref
, u_int32_t unit
,
71 void *unitinfo
, mbuf_t m
, int flags
);
72 static errno_t
ipsec_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t unit
, void *unitinfo
,
73 int opt
, void *data
, size_t *len
);
74 static errno_t
ipsec_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t unit
, void *unitinfo
,
75 int opt
, void *data
, size_t len
);
77 /* Network Interface functions */
78 static void ipsec_start(ifnet_t interface
);
79 static errno_t
ipsec_output(ifnet_t interface
, mbuf_t data
);
80 static errno_t
ipsec_demux(ifnet_t interface
, mbuf_t data
, char *frame_header
,
81 protocol_family_t
*protocol
);
82 static errno_t
ipsec_add_proto(ifnet_t interface
, protocol_family_t protocol
,
83 const struct ifnet_demux_desc
*demux_array
,
84 u_int32_t demux_count
);
85 static errno_t
ipsec_del_proto(ifnet_t interface
, protocol_family_t protocol
);
86 static errno_t
ipsec_ioctl(ifnet_t interface
, u_long cmd
, void *data
);
87 static void ipsec_detached(ifnet_t interface
);
89 /* Protocol handlers */
90 static errno_t
ipsec_attach_proto(ifnet_t interface
, protocol_family_t proto
);
91 static errno_t
ipsec_proto_input(ifnet_t interface
, protocol_family_t protocol
,
92 mbuf_t m
, char *frame_header
);
93 static errno_t
ipsec_proto_pre_output(ifnet_t interface
, protocol_family_t protocol
,
94 mbuf_t
*packet
, const struct sockaddr
*dest
, void *route
,
95 char *frame_type
, char *link_layer_dest
);
97 static kern_ctl_ref ipsec_kctlref
;
98 static u_int32_t ipsec_family
;
99 static lck_attr_t
*ipsec_lck_attr
;
100 static lck_grp_attr_t
*ipsec_lck_grp_attr
;
101 static lck_grp_t
*ipsec_lck_grp
;
102 static lck_mtx_t ipsec_lock
;
106 SYSCTL_DECL(_net_ipsec
);
107 SYSCTL_NODE(_net
, OID_AUTO
, ipsec
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "IPsec");
108 static int if_ipsec_verify_interface_creation
= 0;
109 SYSCTL_INT(_net_ipsec
, OID_AUTO
, verify_interface_creation
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &if_ipsec_verify_interface_creation
, 0, "");
111 #define IPSEC_IF_VERIFY(_e) if (unlikely(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
113 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
114 #define IPSEC_IF_DEFAULT_RING_SIZE 64
115 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
116 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
118 #define IPSEC_IF_MIN_RING_SIZE 16
119 #define IPSEC_IF_MAX_RING_SIZE 1024
121 #define IPSEC_IF_MIN_SLOT_SIZE 1024
122 #define IPSEC_IF_MAX_SLOT_SIZE 4096
124 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
;
125 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
;
126 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
;
128 static int if_ipsec_ring_size
= IPSEC_IF_DEFAULT_RING_SIZE
;
129 static int if_ipsec_tx_fsw_ring_size
= IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE
;
130 static int if_ipsec_rx_fsw_ring_size
= IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE
;
132 SYSCTL_PROC(_net_ipsec
, OID_AUTO
, ring_size
, CTLTYPE_INT
| CTLFLAG_LOCKED
| CTLFLAG_RW
,
133 &if_ipsec_ring_size
, IPSEC_IF_DEFAULT_RING_SIZE
, &sysctl_if_ipsec_ring_size
, "I", "");
134 SYSCTL_PROC(_net_ipsec
, OID_AUTO
, tx_fsw_ring_size
, CTLTYPE_INT
| CTLFLAG_LOCKED
| CTLFLAG_RW
,
135 &if_ipsec_tx_fsw_ring_size
, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE
, &sysctl_if_ipsec_tx_fsw_ring_size
, "I", "");
136 SYSCTL_PROC(_net_ipsec
, OID_AUTO
, rx_fsw_ring_size
, CTLTYPE_INT
| CTLFLAG_LOCKED
| CTLFLAG_RW
,
137 &if_ipsec_rx_fsw_ring_size
, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE
, &sysctl_if_ipsec_rx_fsw_ring_size
, "I", "");
140 ipsec_register_nexus(void);
142 typedef struct ipsec_nx
{
152 static nexus_controller_t ipsec_ncd
;
153 static int ipsec_ncd_refcount
;
154 static uuid_t ipsec_kpipe_uuid
;
156 #endif // IPSEC_NEXUS
158 /* Control block allocated for each kernel control connection */
160 TAILQ_ENTRY(ipsec_pcb
) ipsec_chain
;
161 kern_ctl_ref ipsec_ctlref
;
163 u_int32_t ipsec_unit
;
164 u_int32_t ipsec_unique_id
;
165 u_int32_t ipsec_flags
;
166 u_int32_t ipsec_input_frag_size
;
167 bool ipsec_frag_size_set
;
168 int ipsec_ext_ifdata_stats
;
169 mbuf_svc_class_t ipsec_output_service_class
;
170 char ipsec_if_xname
[IFXNAMSIZ
];
171 char ipsec_unique_name
[IFXNAMSIZ
];
172 // PCB lock protects state fields, like ipsec_kpipe_enabled
173 decl_lck_rw_data(, ipsec_pcb_lock
);
176 lck_mtx_t ipsec_input_chain_lock
;
177 struct mbuf
* ipsec_input_chain
;
178 struct mbuf
* ipsec_input_chain_last
;
179 // Input chain lock protects the list of input mbufs
180 // The input chain lock must be taken AFTER the PCB lock if both are held
181 struct ipsec_nx ipsec_nx
;
182 int ipsec_kpipe_enabled
;
183 uuid_t ipsec_kpipe_uuid
;
184 void * ipsec_kpipe_rxring
;
185 void * ipsec_kpipe_txring
;
187 kern_nexus_t ipsec_netif_nexus
;
188 void * ipsec_netif_rxring
;
189 void * ipsec_netif_txring
;
190 uint64_t ipsec_netif_txring_size
;
192 u_int32_t ipsec_slot_size
;
193 u_int32_t ipsec_netif_ring_size
;
194 u_int32_t ipsec_tx_fsw_ring_size
;
195 u_int32_t ipsec_rx_fsw_ring_size
;
196 bool ipsec_use_netif
;
198 #endif // IPSEC_NEXUS
201 TAILQ_HEAD(ipsec_list
, ipsec_pcb
) ipsec_head
;
203 #define IPSEC_PCB_ZONE_MAX 32
204 #define IPSEC_PCB_ZONE_NAME "net.if_ipsec"
206 static unsigned int ipsec_pcb_size
; /* size of zone element */
207 static struct zone
*ipsec_pcb_zone
; /* zone for ipsec_pcb */
209 #define IPSECQ_MAXLEN 256
213 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
215 #pragma unused(arg1, arg2)
216 int value
= if_ipsec_ring_size
;
218 int error
= sysctl_handle_int(oidp
, &value
, 0, req
);
219 if (error
|| !req
->newptr
) {
223 if (value
< IPSEC_IF_MIN_RING_SIZE
||
224 value
> IPSEC_IF_MAX_RING_SIZE
) {
228 if_ipsec_ring_size
= value
;
234 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
236 #pragma unused(arg1, arg2)
237 int value
= if_ipsec_tx_fsw_ring_size
;
239 int error
= sysctl_handle_int(oidp
, &value
, 0, req
);
240 if (error
|| !req
->newptr
) {
244 if (value
< IPSEC_IF_MIN_RING_SIZE
||
245 value
> IPSEC_IF_MAX_RING_SIZE
) {
249 if_ipsec_tx_fsw_ring_size
= value
;
255 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
257 #pragma unused(arg1, arg2)
258 int value
= if_ipsec_rx_fsw_ring_size
;
260 int error
= sysctl_handle_int(oidp
, &value
, 0, req
);
261 if (error
|| !req
->newptr
) {
265 if (value
< IPSEC_IF_MIN_RING_SIZE
||
266 value
> IPSEC_IF_MAX_RING_SIZE
) {
270 if_ipsec_rx_fsw_ring_size
= value
;
274 #endif // IPSEC_NEXUS
277 ipsec_register_control(void)
279 struct kern_ctl_reg kern_ctl
;
282 /* Find a unique value for our interface family */
283 result
= mbuf_tag_id_find(IPSEC_CONTROL_NAME
, &ipsec_family
);
285 printf("ipsec_register_control - mbuf_tag_id_find_internal failed: %d\n", result
);
289 ipsec_pcb_size
= sizeof(struct ipsec_pcb
);
290 ipsec_pcb_zone
= zinit(ipsec_pcb_size
,
291 IPSEC_PCB_ZONE_MAX
* ipsec_pcb_size
,
292 0, IPSEC_PCB_ZONE_NAME
);
293 if (ipsec_pcb_zone
== NULL
) {
294 printf("ipsec_register_control - zinit(ipsec_pcb) failed");
299 ipsec_register_nexus();
300 #endif // IPSEC_NEXUS
302 TAILQ_INIT(&ipsec_head
);
304 bzero(&kern_ctl
, sizeof(kern_ctl
));
305 strlcpy(kern_ctl
.ctl_name
, IPSEC_CONTROL_NAME
, sizeof(kern_ctl
.ctl_name
));
306 kern_ctl
.ctl_name
[sizeof(kern_ctl
.ctl_name
) - 1] = 0;
307 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
; /* Require root */
308 kern_ctl
.ctl_sendsize
= 64 * 1024;
309 kern_ctl
.ctl_recvsize
= 64 * 1024;
310 kern_ctl
.ctl_bind
= ipsec_ctl_bind
;
311 kern_ctl
.ctl_connect
= ipsec_ctl_connect
;
312 kern_ctl
.ctl_disconnect
= ipsec_ctl_disconnect
;
313 kern_ctl
.ctl_send
= ipsec_ctl_send
;
314 kern_ctl
.ctl_setopt
= ipsec_ctl_setopt
;
315 kern_ctl
.ctl_getopt
= ipsec_ctl_getopt
;
317 result
= ctl_register(&kern_ctl
, &ipsec_kctlref
);
319 printf("ipsec_register_control - ctl_register failed: %d\n", result
);
323 /* Register the protocol plumbers */
324 if ((result
= proto_register_plumber(PF_INET
, ipsec_family
,
325 ipsec_attach_proto
, NULL
)) != 0) {
326 printf("ipsec_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n",
327 ipsec_family
, result
);
328 ctl_deregister(ipsec_kctlref
);
332 /* Register the protocol plumbers */
333 if ((result
= proto_register_plumber(PF_INET6
, ipsec_family
,
334 ipsec_attach_proto
, NULL
)) != 0) {
335 proto_unregister_plumber(PF_INET
, ipsec_family
);
336 ctl_deregister(ipsec_kctlref
);
337 printf("ipsec_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n",
338 ipsec_family
, result
);
342 ipsec_lck_attr
= lck_attr_alloc_init();
343 ipsec_lck_grp_attr
= lck_grp_attr_alloc_init();
344 ipsec_lck_grp
= lck_grp_alloc_init("ipsec", ipsec_lck_grp_attr
);
345 lck_mtx_init(&ipsec_lock
, ipsec_lck_grp
, ipsec_lck_attr
);
352 ipsec_interface_isvalid (ifnet_t interface
)
354 struct ipsec_pcb
*pcb
= NULL
;
356 if (interface
== NULL
)
359 pcb
= ifnet_softc(interface
);
364 /* When ctl disconnects, ipsec_unit is set to 0 */
365 if (pcb
->ipsec_unit
== 0)
372 ipsec_ifnet_set_attrs(ifnet_t ifp
)
374 /* Set flags and additional information. */
375 ifnet_set_mtu(ifp
, 1500);
376 ifnet_set_flags(ifp
, IFF_UP
| IFF_MULTICAST
| IFF_POINTOPOINT
, 0xffff);
378 /* The interface must generate its own IPv6 LinkLocal address,
379 * if possible following the recommendation of RFC2472 to the 64bit interface ID
381 ifnet_set_eflags(ifp
, IFEF_NOAUTOIPV6LL
, IFEF_NOAUTOIPV6LL
);
384 /* Reset the stats in case as the interface may have been recycled */
385 struct ifnet_stats_param stats
;
386 bzero(&stats
, sizeof(struct ifnet_stats_param
));
387 ifnet_set_stat(ifp
, &stats
);
388 #endif // !IPSEC_NEXUS
395 static uuid_t ipsec_nx_dom_prov
;
398 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov
)
404 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov
)
410 ipsec_register_nexus(void)
412 const struct kern_nexus_domain_provider_init dp_init
= {
413 .nxdpi_version
= KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION
,
415 .nxdpi_init
= ipsec_nxdp_init
,
416 .nxdpi_fini
= ipsec_nxdp_fini
420 /* ipsec_nxdp_init() is called before this function returns */
421 err
= kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF
,
422 (const uint8_t *) "com.apple.ipsec",
423 &dp_init
, sizeof(dp_init
),
426 printf("%s: failed to register domain provider\n", __func__
);
433 ipsec_netif_prepare(kern_nexus_t nexus
, ifnet_t ifp
)
435 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
436 pcb
->ipsec_netif_nexus
= nexus
;
437 return (ipsec_ifnet_set_attrs(ifp
));
441 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov
,
442 proc_t p
, kern_nexus_t nexus
,
443 nexus_port_t nexus_port
, kern_channel_t channel
, void **ch_ctx
)
445 #pragma unused(nxprov, p)
446 #pragma unused(nexus, nexus_port, channel, ch_ctx)
451 ipsec_nexus_connected(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
452 kern_channel_t channel
)
454 #pragma unused(nxprov, channel)
455 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
456 boolean_t ok
= ifnet_is_attached(pcb
->ipsec_ifp
, 1);
457 return (ok
? 0 : ENXIO
);
461 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
462 kern_channel_t channel
)
464 #pragma unused(nxprov, nexus, channel)
468 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
469 kern_channel_t channel
)
471 #pragma unused(nxprov, nexus, channel)
475 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
476 kern_channel_t channel
)
478 #pragma unused(nxprov, channel)
479 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
480 if (pcb
->ipsec_netif_nexus
== nexus
) {
481 pcb
->ipsec_netif_nexus
= NULL
;
483 ifnet_decr_iorefcnt(pcb
->ipsec_ifp
);
487 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
488 kern_channel_t channel
, kern_channel_ring_t ring
, boolean_t is_tx_ring
,
491 #pragma unused(nxprov)
492 #pragma unused(channel)
493 #pragma unused(ring_ctx)
494 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
496 VERIFY(pcb
->ipsec_kpipe_rxring
== NULL
);
497 pcb
->ipsec_kpipe_rxring
= ring
;
499 VERIFY(pcb
->ipsec_kpipe_txring
== NULL
);
500 pcb
->ipsec_kpipe_txring
= ring
;
506 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
507 kern_channel_ring_t ring
)
509 #pragma unused(nxprov)
510 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
511 if (pcb
->ipsec_kpipe_rxring
== ring
) {
512 pcb
->ipsec_kpipe_rxring
= NULL
;
513 } else if (pcb
->ipsec_kpipe_txring
== ring
) {
514 pcb
->ipsec_kpipe_txring
= NULL
;
519 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
520 kern_channel_ring_t tx_ring
, uint32_t flags
)
522 #pragma unused(nxprov)
523 #pragma unused(flags)
524 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
526 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
527 int channel_enabled
= pcb
->ipsec_kpipe_enabled
;
528 if (!channel_enabled
) {
529 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
533 kern_channel_slot_t tx_slot
= kern_channel_get_next_slot(tx_ring
, NULL
, NULL
);
534 if (tx_slot
== NULL
) {
535 // Nothing to write, bail
536 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
540 // Signal the netif ring to read
541 kern_channel_ring_t rx_ring
= pcb
->ipsec_netif_rxring
;
542 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
544 if (rx_ring
!= NULL
) {
545 kern_channel_notify(rx_ring
, 0);
551 ipsec_encrypt_mbuf(ifnet_t interface
,
554 struct ipsec_output_state ipsec_state
;
558 // Make sure this packet isn't looping through the interface
559 if (necp_get_last_interface_index_from_packet(data
) == interface
->if_index
) {
561 goto ipsec_output_err
;
564 // Mark the interface so NECP can evaluate tunnel policy
565 necp_mark_packet_from_interface(data
, interface
);
567 struct ip
*ip
= mtod(data
, struct ip
*);
568 u_int ip_version
= ip
->ip_v
;
570 switch (ip_version
) {
574 memset(&ipsec_state
, 0, sizeof(ipsec_state
));
575 ipsec_state
.m
= data
;
576 ipsec_state
.dst
= (struct sockaddr
*)&ip
->ip_dst
;
577 memset(&ipsec_state
.ro
, 0, sizeof(ipsec_state
.ro
));
579 error
= ipsec4_interface_output(&ipsec_state
, interface
);
580 if (error
== 0 && ipsec_state
.tunneled
== 6) {
581 // Tunneled in IPv6 - packet is gone
582 // TODO: Don't lose mbuf
587 data
= ipsec_state
.m
;
588 if (error
|| data
== NULL
) {
590 printf("ipsec_encrypt_mbuf: ipsec4_output error %d\n", error
);
592 goto ipsec_output_err
;
599 data
= ipsec6_splithdr(data
);
601 printf("ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
602 goto ipsec_output_err
;
605 struct ip6_hdr
*ip6
= mtod(data
, struct ip6_hdr
*);
607 memset(&ipsec_state
, 0, sizeof(ipsec_state
));
608 ipsec_state
.m
= data
;
609 ipsec_state
.dst
= (struct sockaddr
*)&ip6
->ip6_dst
;
610 memset(&ipsec_state
.ro
, 0, sizeof(ipsec_state
.ro
));
612 error
= ipsec6_interface_output(&ipsec_state
, interface
, &ip6
->ip6_nxt
, ipsec_state
.m
);
613 if (error
== 0 && ipsec_state
.tunneled
== 4) {
614 // Tunneled in IPv4 - packet is gone
615 // TODO: Don't lose mbuf
619 data
= ipsec_state
.m
;
620 if (error
|| data
== NULL
) {
622 printf("ipsec_encrypt_mbuf: ipsec6_output error %d\n", error
);
624 goto ipsec_output_err
;
629 printf("ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version
);
631 goto ipsec_output_err
;
646 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
647 kern_channel_ring_t rx_ring
, uint32_t flags
)
649 #pragma unused(nxprov)
650 #pragma unused(flags)
651 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
652 struct kern_channel_ring_stat_increment rx_ring_stats
;
654 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
656 int channel_enabled
= pcb
->ipsec_kpipe_enabled
;
657 if (!channel_enabled
) {
658 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
662 // Reclaim user-released slots
663 (void) kern_channel_reclaim(rx_ring
);
665 uint32_t avail
= kern_channel_available_slot_count(rx_ring
);
667 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
671 kern_channel_ring_t tx_ring
= pcb
->ipsec_netif_txring
;
672 if (tx_ring
== NULL
) {
673 // Net-If TX ring not set up yet, nothing to read
674 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
678 struct netif_stats
*nifs
= &NX_NETIF_PRIVATE(pcb
->ipsec_netif_nexus
)->nif_stats
;
680 // Unlock ipsec before entering ring
681 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
683 (void)kr_enter(tx_ring
, TRUE
);
685 // Lock again after entering and validate
686 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
687 if (tx_ring
!= pcb
->ipsec_netif_txring
) {
688 // Ring no longer valid
689 // Unlock first, then exit ring
690 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
696 struct kern_channel_ring_stat_increment tx_ring_stats
;
697 bzero(&tx_ring_stats
, sizeof(tx_ring_stats
));
698 kern_channel_slot_t tx_pslot
= NULL
;
699 kern_channel_slot_t tx_slot
= kern_channel_get_next_slot(tx_ring
, NULL
, NULL
);
700 if (tx_slot
== NULL
) {
701 // Nothing to read, don't bother signalling
702 // Unlock first, then exit ring
703 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
708 struct kern_pbufpool
*rx_pp
= rx_ring
->ckr_pp
;
709 VERIFY(rx_pp
!= NULL
);
710 bzero(&rx_ring_stats
, sizeof(rx_ring_stats
));
711 kern_channel_slot_t rx_pslot
= NULL
;
712 kern_channel_slot_t rx_slot
= kern_channel_get_next_slot(rx_ring
, NULL
, NULL
);
714 while (rx_slot
!= NULL
&& tx_slot
!= NULL
) {
719 // Allocate rx packet
720 kern_packet_t rx_ph
= 0;
721 error
= kern_pbufpool_alloc_nosleep(rx_pp
, 1, &rx_ph
);
722 if (unlikely(error
!= 0)) {
723 printf("ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
724 pcb
->ipsec_ifp
->if_xname
);
728 kern_packet_t tx_ph
= kern_channel_slot_get_packet(tx_ring
, tx_slot
);
732 tx_slot
= kern_channel_get_next_slot(tx_ring
, tx_slot
, NULL
);
738 kern_buflet_t tx_buf
= kern_packet_get_next_buflet(tx_ph
, NULL
);
739 VERIFY(tx_buf
!= NULL
);
740 uint8_t *tx_baddr
= kern_buflet_get_object_address(tx_buf
);
741 VERIFY(tx_baddr
!= NULL
);
742 tx_baddr
+= kern_buflet_get_data_offset(tx_buf
);
744 bpf_tap_packet_out(pcb
->ipsec_ifp
, DLT_RAW
, tx_ph
, NULL
, 0);
746 length
= MIN(kern_packet_get_data_length(tx_ph
),
747 pcb
->ipsec_slot_size
);
749 // Increment TX stats
750 tx_ring_stats
.kcrsi_slots_transferred
++;
751 tx_ring_stats
.kcrsi_bytes_transferred
+= length
;
754 error
= mbuf_gethdr(MBUF_DONTWAIT
, MBUF_TYPE_HEADER
, &data
);
756 error
= mbuf_copyback(data
, 0, length
, tx_baddr
, MBUF_DONTWAIT
);
758 // Encrypt and send packet
759 data
= ipsec_encrypt_mbuf(pcb
->ipsec_ifp
, data
);
761 printf("ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb
->ipsec_ifp
->if_xname
, length
, error
);
762 STATS_INC(nifs
, NETIF_STATS_NOMEM_MBUF
);
763 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
768 printf("ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb
->ipsec_ifp
->if_xname
, error
);
769 STATS_INC(nifs
, NETIF_STATS_NOMEM_MBUF
);
770 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
773 printf("ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb
->ipsec_ifp
->if_xname
);
774 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
775 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
779 printf("ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb
->ipsec_ifp
->if_xname
);
780 kern_pbufpool_free(rx_pp
, rx_ph
);
784 length
= mbuf_pkthdr_len(data
);
785 if (length
> rx_pp
->pp_buflet_size
) {
788 kern_pbufpool_free(rx_pp
, rx_ph
);
789 printf("ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
790 pcb
->ipsec_ifp
->if_xname
, length
, rx_pp
->pp_buflet_size
);
795 kern_buflet_t rx_buf
= kern_packet_get_next_buflet(rx_ph
, NULL
);
796 VERIFY(rx_buf
!= NULL
);
797 void *rx_baddr
= kern_buflet_get_object_address(rx_buf
);
798 VERIFY(rx_baddr
!= NULL
);
800 // Copy-in data from mbuf to buflet
801 mbuf_copydata(data
, 0, length
, (void *)rx_baddr
);
802 kern_packet_clear_flow_uuid(rx_ph
); // Zero flow id
804 // Finalize and attach the packet
805 error
= kern_buflet_set_data_offset(rx_buf
, 0);
807 error
= kern_buflet_set_data_length(rx_buf
, length
);
809 error
= kern_packet_finalize(rx_ph
);
811 error
= kern_channel_slot_attach_packet(rx_ring
, rx_slot
, rx_ph
);
814 STATS_INC(nifs
, NETIF_STATS_TXPKTS
);
815 STATS_INC(nifs
, NETIF_STATS_TXCOPY_DIRECT
);
817 rx_ring_stats
.kcrsi_slots_transferred
++;
818 rx_ring_stats
.kcrsi_bytes_transferred
+= length
;
820 if (!pcb
->ipsec_ext_ifdata_stats
) {
821 ifnet_stat_increment_out(pcb
->ipsec_ifp
, 1, length
, 0);
827 rx_slot
= kern_channel_get_next_slot(rx_ring
, rx_slot
, NULL
);
831 kern_channel_advance_slot(rx_ring
, rx_pslot
);
832 kern_channel_increment_ring_net_stats(rx_ring
, pcb
->ipsec_ifp
, &rx_ring_stats
);
836 kern_channel_advance_slot(tx_ring
, tx_pslot
);
837 kern_channel_increment_ring_net_stats(tx_ring
, pcb
->ipsec_ifp
, &tx_ring_stats
);
838 (void)kern_channel_reclaim(tx_ring
);
841 /* always reenable output */
842 errno_t error
= ifnet_enable_output(pcb
->ipsec_ifp
);
844 printf("ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error
);
847 // Unlock first, then exit ring
848 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
850 if (tx_pslot
!= NULL
) {
851 kern_channel_notify(tx_ring
, 0);
859 ipsec_netif_ring_init(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
860 kern_channel_t channel
, kern_channel_ring_t ring
, boolean_t is_tx_ring
,
863 #pragma unused(nxprov)
864 #pragma unused(channel)
865 #pragma unused(ring_ctx)
866 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
868 VERIFY(pcb
->ipsec_netif_rxring
== NULL
);
869 pcb
->ipsec_netif_rxring
= ring
;
871 VERIFY(pcb
->ipsec_netif_txring
== NULL
);
872 pcb
->ipsec_netif_txring
= ring
;
878 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
879 kern_channel_ring_t ring
)
881 #pragma unused(nxprov)
882 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
883 if (pcb
->ipsec_netif_rxring
== ring
) {
884 pcb
->ipsec_netif_rxring
= NULL
;
885 } else if (pcb
->ipsec_netif_txring
== ring
) {
886 pcb
->ipsec_netif_txring
= NULL
;
891 ipsec_netif_check_policy(mbuf_t data
)
893 necp_kernel_policy_result necp_result
= 0;
894 necp_kernel_policy_result_parameter necp_result_parameter
= {};
895 uint32_t necp_matched_policy_id
= 0;
897 // This packet has been marked with IP level policy, do not mark again.
898 if (data
&& data
->m_pkthdr
.necp_mtag
.necp_policy_id
>= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP
) {
902 size_t length
= mbuf_pkthdr_len(data
);
903 if (length
< sizeof(struct ip
)) {
907 struct ip
*ip
= mtod(data
, struct ip
*);
908 u_int ip_version
= ip
->ip_v
;
909 switch (ip_version
) {
911 necp_matched_policy_id
= necp_ip_output_find_policy_match(data
, 0, NULL
,
912 &necp_result
, &necp_result_parameter
);
916 necp_matched_policy_id
= necp_ip6_output_find_policy_match(data
, 0, NULL
,
917 &necp_result
, &necp_result_parameter
);
925 if (necp_result
== NECP_KERNEL_POLICY_RESULT_DROP
||
926 necp_result
== NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT
) {
927 /* Drop and flow divert packets should be blocked at the IP layer */
931 necp_mark_packet_from_ip(data
, necp_matched_policy_id
);
936 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
937 kern_channel_ring_t tx_ring
, uint32_t flags
)
939 #pragma unused(nxprov)
940 #pragma unused(flags)
941 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
943 struct netif_stats
*nifs
= &NX_NETIF_PRIVATE(nexus
)->nif_stats
;
945 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
947 struct kern_channel_ring_stat_increment tx_ring_stats
;
948 bzero(&tx_ring_stats
, sizeof(tx_ring_stats
));
949 kern_channel_slot_t tx_pslot
= NULL
;
950 kern_channel_slot_t tx_slot
= kern_channel_get_next_slot(tx_ring
, NULL
, NULL
);
952 STATS_INC(nifs
, NETIF_STATS_TXSYNC
);
954 if (tx_slot
== NULL
) {
955 // Nothing to write, don't bother signalling
956 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
960 if (pcb
->ipsec_kpipe_enabled
) {
961 kern_channel_ring_t rx_ring
= pcb
->ipsec_kpipe_rxring
;
962 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
964 // Signal the kernel pipe ring to read
965 if (rx_ring
!= NULL
) {
966 kern_channel_notify(rx_ring
, 0);
971 // If we're here, we're injecting into the BSD stack
972 while (tx_slot
!= NULL
) {
976 kern_packet_t tx_ph
= kern_channel_slot_get_packet(tx_ring
, tx_slot
);
980 tx_slot
= kern_channel_get_next_slot(tx_ring
, tx_slot
, NULL
);
986 kern_buflet_t tx_buf
= kern_packet_get_next_buflet(tx_ph
, NULL
);
987 VERIFY(tx_buf
!= NULL
);
988 uint8_t *tx_baddr
= kern_buflet_get_object_address(tx_buf
);
989 VERIFY(tx_baddr
!= 0);
990 tx_baddr
+= kern_buflet_get_data_offset(tx_buf
);
992 bpf_tap_packet_out(pcb
->ipsec_ifp
, DLT_RAW
, tx_ph
, NULL
, 0);
994 length
= MIN(kern_packet_get_data_length(tx_ph
),
995 pcb
->ipsec_slot_size
);
998 errno_t error
= mbuf_gethdr(MBUF_DONTWAIT
, MBUF_TYPE_HEADER
, &data
);
1000 error
= mbuf_copyback(data
, 0, length
, tx_baddr
, MBUF_DONTWAIT
);
1002 // Mark packet from policy
1003 uint32_t policy_id
= kern_packet_get_policy_id(tx_ph
);
1004 necp_mark_packet_from_ip(data
, policy_id
);
1006 // Check policy with NECP
1007 if (!ipsec_netif_check_policy(data
)) {
1008 printf("ipsec_netif_sync_tx %s - failed policy check\n", pcb
->ipsec_ifp
->if_xname
);
1009 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1013 // Send through encryption
1014 error
= ipsec_output(pcb
->ipsec_ifp
, data
);
1016 printf("ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb
->ipsec_ifp
->if_xname
, error
);
1020 printf("ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb
->ipsec_ifp
->if_xname
, length
, error
);
1021 STATS_INC(nifs
, NETIF_STATS_NOMEM_MBUF
);
1022 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1027 printf("ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb
->ipsec_ifp
->if_xname
, error
);
1028 STATS_INC(nifs
, NETIF_STATS_NOMEM_MBUF
);
1029 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1032 printf("ipsec_netif_sync_tx %s - 0 length packet\n", pcb
->ipsec_ifp
->if_xname
);
1033 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1034 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1038 printf("ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb
->ipsec_ifp
->if_xname
);
1042 STATS_INC(nifs
, NETIF_STATS_TXPKTS
);
1043 STATS_INC(nifs
, NETIF_STATS_TXCOPY_MBUF
);
1045 tx_ring_stats
.kcrsi_slots_transferred
++;
1046 tx_ring_stats
.kcrsi_bytes_transferred
+= length
;
1050 kern_channel_advance_slot(tx_ring
, tx_pslot
);
1051 kern_channel_increment_ring_net_stats(tx_ring
, pcb
->ipsec_ifp
, &tx_ring_stats
);
1052 (void)kern_channel_reclaim(tx_ring
);
1055 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1061 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
1062 kern_channel_ring_t ring
, __unused
uint32_t flags
)
1064 #pragma unused(nxprov)
1065 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
1066 boolean_t more
= false;
1070 * Refill and sync the ring; we may be racing against another thread doing
1071 * an RX sync that also wants to do kr_enter(), and so use the blocking
1074 rc
= kern_channel_tx_refill_canblock(ring
, UINT32_MAX
, UINT32_MAX
, true, &more
);
1075 if (rc
!= 0 && rc
!= EAGAIN
&& rc
!= EBUSY
) {
1076 printf("%s, tx refill failed %d\n", __func__
, rc
);
1079 (void) kr_enter(ring
, TRUE
);
1080 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
1082 if (pcb
->ipsec_kpipe_enabled
) {
1083 uint32_t tx_available
= kern_channel_available_slot_count(ring
);
1084 if (pcb
->ipsec_netif_txring_size
> 0 &&
1085 tx_available
>= pcb
->ipsec_netif_txring_size
- 1) {
1086 // No room left in tx ring, disable output for now
1087 errno_t error
= ifnet_disable_output(pcb
->ipsec_ifp
);
1089 printf("ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error
);
1094 if (pcb
->ipsec_kpipe_enabled
) {
1095 kern_channel_ring_t rx_ring
= pcb
->ipsec_kpipe_rxring
;
1097 // Unlock while calling notify
1098 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1099 // Signal the kernel pipe ring to read
1100 if (rx_ring
!= NULL
) {
1101 kern_channel_notify(rx_ring
, 0);
1104 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1113 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov
, kern_nexus_t nexus
,
1114 kern_channel_ring_t rx_ring
, uint32_t flags
)
1116 #pragma unused(nxprov)
1117 #pragma unused(flags)
1118 struct ipsec_pcb
*pcb
= kern_nexus_get_context(nexus
);
1119 struct kern_channel_ring_stat_increment rx_ring_stats
;
1121 struct netif_stats
*nifs
= &NX_NETIF_PRIVATE(nexus
)->nif_stats
;
1123 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
1125 // Reclaim user-released slots
1126 (void) kern_channel_reclaim(rx_ring
);
1128 STATS_INC(nifs
, NETIF_STATS_RXSYNC
);
1130 uint32_t avail
= kern_channel_available_slot_count(rx_ring
);
1132 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1136 struct kern_pbufpool
*rx_pp
= rx_ring
->ckr_pp
;
1137 VERIFY(rx_pp
!= NULL
);
1138 bzero(&rx_ring_stats
, sizeof(rx_ring_stats
));
1139 kern_channel_slot_t rx_pslot
= NULL
;
1140 kern_channel_slot_t rx_slot
= kern_channel_get_next_slot(rx_ring
, NULL
, NULL
);
1142 while (rx_slot
!= NULL
) {
1143 // Check for a waiting packet
1144 lck_mtx_lock(&pcb
->ipsec_input_chain_lock
);
1145 mbuf_t data
= pcb
->ipsec_input_chain
;
1147 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
1151 // Allocate rx packet
1152 kern_packet_t rx_ph
= 0;
1153 errno_t error
= kern_pbufpool_alloc_nosleep(rx_pp
, 1, &rx_ph
);
1154 if (unlikely(error
!= 0)) {
1155 STATS_INC(nifs
, NETIF_STATS_NOMEM_PKT
);
1156 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1157 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
1161 // Advance waiting packets
1162 pcb
->ipsec_input_chain
= data
->m_nextpkt
;
1163 data
->m_nextpkt
= NULL
;
1164 if (pcb
->ipsec_input_chain
== NULL
) {
1165 pcb
->ipsec_input_chain_last
= NULL
;
1167 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
1169 size_t length
= mbuf_pkthdr_len(data
);
1171 if (length
< sizeof(struct ip
)) {
1174 kern_pbufpool_free(rx_pp
, rx_ph
);
1175 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1176 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1177 printf("ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1178 pcb
->ipsec_ifp
->if_xname
, length
, sizeof(struct ip
));
1183 struct ip
*ip
= mtod(data
, struct ip
*);
1184 u_int ip_version
= ip
->ip_v
;
1185 switch (ip_version
) {
1195 printf("ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1196 pcb
->ipsec_ifp
->if_xname
, ip_version
);
1201 if (length
> rx_pp
->pp_buflet_size
||
1202 (pcb
->ipsec_frag_size_set
&& length
> pcb
->ipsec_input_frag_size
)) {
1204 // We need to fragment to send up into the netif
1206 u_int32_t fragment_mtu
= rx_pp
->pp_buflet_size
;
1207 if (pcb
->ipsec_frag_size_set
&&
1208 pcb
->ipsec_input_frag_size
< rx_pp
->pp_buflet_size
) {
1209 fragment_mtu
= pcb
->ipsec_input_frag_size
;
1212 mbuf_t fragment_chain
= NULL
;
1215 // ip_fragment expects the length in host order
1216 ip
->ip_len
= ntohs(ip
->ip_len
);
1218 // ip_fragment will modify the original data, don't free
1219 int fragment_error
= ip_fragment(data
, pcb
->ipsec_ifp
, fragment_mtu
, TRUE
);
1220 if (fragment_error
== 0 && data
!= NULL
) {
1221 fragment_chain
= data
;
1223 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1224 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1225 printf("ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1226 pcb
->ipsec_ifp
->if_xname
, length
, fragment_error
);
1231 if (length
< sizeof(struct ip6_hdr
)) {
1233 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1234 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1235 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1236 pcb
->ipsec_ifp
->if_xname
, length
, sizeof(struct ip6_hdr
));
1239 // ip6_do_fragmentation will free the original data on success only
1240 struct ip6_hdr
*ip6
= mtod(data
, struct ip6_hdr
*);
1241 struct ip6_exthdrs exthdrs
;
1242 memset(&exthdrs
, 0, sizeof(exthdrs
));
1244 int fragment_error
= ip6_do_fragmentation(&data
, 0, pcb
->ipsec_ifp
, sizeof(struct ip6_hdr
),
1245 ip6
, &exthdrs
, fragment_mtu
, ip6
->ip6_nxt
);
1246 if (fragment_error
== 0 && data
!= NULL
) {
1247 fragment_chain
= data
;
1250 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1251 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1252 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1253 pcb
->ipsec_ifp
->if_xname
, length
, fragment_error
);
1259 // Cannot fragment unknown families
1261 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1262 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1263 printf("ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1264 pcb
->ipsec_ifp
->if_xname
, length
, rx_pp
->pp_buflet_size
);
1269 if (fragment_chain
!= NULL
) {
1270 // Add fragments to chain before continuing
1271 lck_mtx_lock(&pcb
->ipsec_input_chain_lock
);
1272 if (pcb
->ipsec_input_chain
!= NULL
) {
1273 pcb
->ipsec_input_chain_last
->m_nextpkt
= fragment_chain
;
1275 pcb
->ipsec_input_chain
= fragment_chain
;
1277 while (fragment_chain
->m_nextpkt
) {
1278 VERIFY(fragment_chain
!= fragment_chain
->m_nextpkt
);
1279 fragment_chain
= fragment_chain
->m_nextpkt
;
1281 pcb
->ipsec_input_chain_last
= fragment_chain
;
1282 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
1285 // Make sure to free unused rx packet
1286 kern_pbufpool_free(rx_pp
, rx_ph
);
1291 mbuf_pkthdr_setrcvif(data
, pcb
->ipsec_ifp
);
1293 // Fillout rx packet
1294 kern_buflet_t rx_buf
= kern_packet_get_next_buflet(rx_ph
, NULL
);
1295 VERIFY(rx_buf
!= NULL
);
1296 void *rx_baddr
= kern_buflet_get_object_address(rx_buf
);
1297 VERIFY(rx_baddr
!= NULL
);
1299 // Copy-in data from mbuf to buflet
1300 mbuf_copydata(data
, 0, length
, (void *)rx_baddr
);
1301 kern_packet_clear_flow_uuid(rx_ph
); // Zero flow id
1303 // Finalize and attach the packet
1304 error
= kern_buflet_set_data_offset(rx_buf
, 0);
1306 error
= kern_buflet_set_data_length(rx_buf
, length
);
1308 error
= kern_packet_set_link_header_offset(rx_ph
, 0);
1310 error
= kern_packet_set_network_header_offset(rx_ph
, 0);
1312 error
= kern_packet_finalize(rx_ph
);
1314 error
= kern_channel_slot_attach_packet(rx_ring
, rx_slot
, rx_ph
);
1317 STATS_INC(nifs
, NETIF_STATS_RXPKTS
);
1318 STATS_INC(nifs
, NETIF_STATS_RXCOPY_MBUF
);
1319 bpf_tap_packet_in(pcb
->ipsec_ifp
, DLT_RAW
, rx_ph
, NULL
, 0);
1321 rx_ring_stats
.kcrsi_slots_transferred
++;
1322 rx_ring_stats
.kcrsi_bytes_transferred
+= length
;
1324 if (!pcb
->ipsec_ext_ifdata_stats
) {
1325 ifnet_stat_increment_in(pcb
->ipsec_ifp
, 1, length
, 0);
1332 rx_slot
= kern_channel_get_next_slot(rx_ring
, rx_slot
, NULL
);
1335 struct kern_channel_ring_stat_increment tx_ring_stats
;
1336 bzero(&tx_ring_stats
, sizeof(tx_ring_stats
));
1337 kern_channel_ring_t tx_ring
= pcb
->ipsec_kpipe_txring
;
1338 kern_channel_slot_t tx_pslot
= NULL
;
1339 kern_channel_slot_t tx_slot
= NULL
;
1340 if (tx_ring
== NULL
) {
1341 // Net-If TX ring not set up yet, nothing to read
1346 // Unlock ipsec before entering ring
1347 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1349 (void)kr_enter(tx_ring
, TRUE
);
1351 // Lock again after entering and validate
1352 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
1354 if (tx_ring
!= pcb
->ipsec_kpipe_txring
) {
1358 tx_slot
= kern_channel_get_next_slot(tx_ring
, NULL
, NULL
);
1359 if (tx_slot
== NULL
) {
1360 // Nothing to read, don't bother signalling
1364 while (rx_slot
!= NULL
&& tx_slot
!= NULL
) {
1370 // Allocate rx packet
1371 kern_packet_t rx_ph
= 0;
1372 error
= kern_pbufpool_alloc_nosleep(rx_pp
, 1, &rx_ph
);
1373 if (unlikely(error
!= 0)) {
1374 STATS_INC(nifs
, NETIF_STATS_NOMEM_PKT
);
1375 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1379 kern_packet_t tx_ph
= kern_channel_slot_get_packet(tx_ring
, tx_slot
);
1383 tx_slot
= kern_channel_get_next_slot(tx_ring
, tx_slot
, NULL
);
1389 kern_buflet_t tx_buf
= kern_packet_get_next_buflet(tx_ph
, NULL
);
1390 VERIFY(tx_buf
!= NULL
);
1391 uint8_t *tx_baddr
= kern_buflet_get_object_address(tx_buf
);
1392 VERIFY(tx_baddr
!= 0);
1393 tx_baddr
+= kern_buflet_get_data_offset(tx_buf
);
1395 length
= MIN(kern_packet_get_data_length(tx_ph
),
1396 pcb
->ipsec_slot_size
);
1398 // Increment TX stats
1399 tx_ring_stats
.kcrsi_slots_transferred
++;
1400 tx_ring_stats
.kcrsi_bytes_transferred
+= length
;
1402 if (length
>= sizeof(struct ip
)) {
1403 error
= mbuf_gethdr(MBUF_DONTWAIT
, MBUF_TYPE_HEADER
, &data
);
1405 error
= mbuf_copyback(data
, 0, length
, tx_baddr
, MBUF_DONTWAIT
);
1407 struct ip
*ip
= mtod(data
, struct ip
*);
1408 u_int ip_version
= ip
->ip_v
;
1409 switch (ip_version
) {
1412 ip
->ip_len
= ntohs(ip
->ip_len
) - sizeof(struct ip
);
1413 ip
->ip_off
= ntohs(ip
->ip_off
);
1415 if (length
< ip
->ip_len
) {
1416 printf("ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1417 pcb
->ipsec_ifp
->if_xname
, length
, ip
->ip_len
);
1418 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1419 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1423 data
= esp4_input_extended(data
, sizeof(struct ip
), pcb
->ipsec_ifp
);
1428 if (length
< sizeof(struct ip6_hdr
)) {
1429 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1430 pcb
->ipsec_ifp
->if_xname
, length
);
1431 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1432 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1437 struct ip6_hdr
*ip6
= mtod(data
, struct ip6_hdr
*);
1438 const size_t ip6_len
= sizeof(*ip6
) + ntohs(ip6
->ip6_plen
);
1439 if (length
< ip6_len
) {
1440 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1441 pcb
->ipsec_ifp
->if_xname
, length
, ip6_len
);
1442 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1443 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1447 int offset
= sizeof(struct ip6_hdr
);
1448 esp6_input_extended(&data
, &offset
, ip6
->ip6_nxt
, pcb
->ipsec_ifp
);
1454 printf("ipsec_netif_sync_rx %s: unknown ip version %u\n",
1455 pcb
->ipsec_ifp
->if_xname
, ip_version
);
1456 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1463 printf("ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb
->ipsec_ifp
->if_xname
, length
, error
);
1464 STATS_INC(nifs
, NETIF_STATS_NOMEM_MBUF
);
1465 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1470 printf("ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb
->ipsec_ifp
->if_xname
, error
);
1471 STATS_INC(nifs
, NETIF_STATS_NOMEM_MBUF
);
1472 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1475 printf("ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb
->ipsec_ifp
->if_xname
, length
);
1476 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1477 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1481 // Failed to get decrypted data data
1482 kern_pbufpool_free(rx_pp
, rx_ph
);
1486 length
= mbuf_pkthdr_len(data
);
1487 if (length
> rx_pp
->pp_buflet_size
) {
1490 kern_pbufpool_free(rx_pp
, rx_ph
);
1491 STATS_INC(nifs
, NETIF_STATS_BADLEN
);
1492 STATS_INC(nifs
, NETIF_STATS_DROPPED
);
1493 printf("ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1494 pcb
->ipsec_ifp
->if_xname
, length
, rx_pp
->pp_buflet_size
);
1498 mbuf_pkthdr_setrcvif(data
, pcb
->ipsec_ifp
);
1500 // Fillout rx packet
1501 kern_buflet_t rx_buf
= kern_packet_get_next_buflet(rx_ph
, NULL
);
1502 VERIFY(rx_buf
!= NULL
);
1503 void *rx_baddr
= kern_buflet_get_object_address(rx_buf
);
1504 VERIFY(rx_baddr
!= NULL
);
1506 // Copy-in data from mbuf to buflet
1507 mbuf_copydata(data
, 0, length
, (void *)rx_baddr
);
1508 kern_packet_clear_flow_uuid(rx_ph
); // Zero flow id
1510 // Finalize and attach the packet
1511 error
= kern_buflet_set_data_offset(rx_buf
, 0);
1513 error
= kern_buflet_set_data_length(rx_buf
, length
);
1515 error
= kern_packet_set_link_header_offset(rx_ph
, 0);
1517 error
= kern_packet_set_network_header_offset(rx_ph
, 0);
1519 error
= kern_packet_finalize(rx_ph
);
1521 error
= kern_channel_slot_attach_packet(rx_ring
, rx_slot
, rx_ph
);
1524 STATS_INC(nifs
, NETIF_STATS_RXPKTS
);
1525 STATS_INC(nifs
, NETIF_STATS_RXCOPY_DIRECT
);
1526 bpf_tap_packet_in(pcb
->ipsec_ifp
, DLT_RAW
, rx_ph
, NULL
, 0);
1528 rx_ring_stats
.kcrsi_slots_transferred
++;
1529 rx_ring_stats
.kcrsi_bytes_transferred
+= length
;
1531 if (!pcb
->ipsec_ext_ifdata_stats
) {
1532 ifnet_stat_increment_in(pcb
->ipsec_ifp
, 1, length
, 0);
1538 rx_slot
= kern_channel_get_next_slot(rx_ring
, rx_slot
, NULL
);
1543 kern_channel_advance_slot(rx_ring
, rx_pslot
);
1544 kern_channel_increment_ring_net_stats(rx_ring
, pcb
->ipsec_ifp
, &rx_ring_stats
);
1548 kern_channel_advance_slot(tx_ring
, tx_pslot
);
1549 kern_channel_increment_ring_net_stats(tx_ring
, pcb
->ipsec_ifp
, &tx_ring_stats
);
1550 (void)kern_channel_reclaim(tx_ring
);
1553 // Unlock first, then exit ring
1554 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
1555 if (tx_ring
!= NULL
) {
1556 if (tx_pslot
!= NULL
) {
1557 kern_channel_notify(tx_ring
, 0);
1566 ipsec_nexus_ifattach(struct ipsec_pcb
*pcb
,
1567 struct ifnet_init_eparams
*init_params
,
1571 nexus_controller_t controller
= kern_nexus_shared_controller();
1572 struct kern_nexus_net_init net_init
;
1574 nexus_name_t provider_name
;
1575 snprintf((char *)provider_name
, sizeof(provider_name
),
1576 "com.apple.netif.ipsec%d", pcb
->ipsec_unit
);
1578 struct kern_nexus_provider_init prov_init
= {
1579 .nxpi_version
= KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION
,
1580 .nxpi_flags
= NXPIF_VIRTUAL_DEVICE
,
1581 .nxpi_pre_connect
= ipsec_nexus_pre_connect
,
1582 .nxpi_connected
= ipsec_nexus_connected
,
1583 .nxpi_pre_disconnect
= ipsec_netif_pre_disconnect
,
1584 .nxpi_disconnected
= ipsec_nexus_disconnected
,
1585 .nxpi_ring_init
= ipsec_netif_ring_init
,
1586 .nxpi_ring_fini
= ipsec_netif_ring_fini
,
1587 .nxpi_slot_init
= NULL
,
1588 .nxpi_slot_fini
= NULL
,
1589 .nxpi_sync_tx
= ipsec_netif_sync_tx
,
1590 .nxpi_sync_rx
= ipsec_netif_sync_rx
,
1591 .nxpi_tx_doorbell
= ipsec_netif_tx_doorbell
,
1594 nexus_attr_t nxa
= NULL
;
1595 err
= kern_nexus_attr_create(&nxa
);
1596 IPSEC_IF_VERIFY(err
== 0);
1598 printf("%s: kern_nexus_attr_create failed: %d\n",
1603 uint64_t slot_buffer_size
= pcb
->ipsec_slot_size
;
1604 err
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_SLOT_BUF_SIZE
, slot_buffer_size
);
1607 // Reset ring size for netif nexus to limit memory usage
1608 uint64_t ring_size
= pcb
->ipsec_netif_ring_size
;
1609 err
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_TX_SLOTS
, ring_size
);
1611 err
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_RX_SLOTS
, ring_size
);
1614 pcb
->ipsec_netif_txring_size
= ring_size
;
1616 err
= kern_nexus_controller_register_provider(controller
,
1622 &pcb
->ipsec_nx
.if_provider
);
1623 IPSEC_IF_VERIFY(err
== 0);
1625 printf("%s register provider failed, error %d\n",
1630 bzero(&net_init
, sizeof(net_init
));
1631 net_init
.nxneti_version
= KERN_NEXUS_NET_CURRENT_VERSION
;
1632 net_init
.nxneti_flags
= 0;
1633 net_init
.nxneti_eparams
= init_params
;
1634 net_init
.nxneti_lladdr
= NULL
;
1635 net_init
.nxneti_prepare
= ipsec_netif_prepare
;
1636 err
= kern_nexus_controller_alloc_net_provider_instance(controller
,
1637 pcb
->ipsec_nx
.if_provider
,
1639 &pcb
->ipsec_nx
.if_instance
,
1642 IPSEC_IF_VERIFY(err
== 0);
1644 printf("%s alloc_net_provider_instance failed, %d\n",
1646 kern_nexus_controller_deregister_provider(controller
,
1647 pcb
->ipsec_nx
.if_provider
);
1648 uuid_clear(pcb
->ipsec_nx
.if_provider
);
1654 kern_nexus_attr_destroy(nxa
);
1660 ipsec_detach_provider_and_instance(uuid_t provider
, uuid_t instance
)
1662 nexus_controller_t controller
= kern_nexus_shared_controller();
1665 if (!uuid_is_null(instance
)) {
1666 err
= kern_nexus_controller_free_provider_instance(controller
,
1669 printf("%s free_provider_instance failed %d\n",
1672 uuid_clear(instance
);
1674 if (!uuid_is_null(provider
)) {
1675 err
= kern_nexus_controller_deregister_provider(controller
,
1678 printf("%s deregister_provider %d\n", __func__
, err
);
1680 uuid_clear(provider
);
1686 ipsec_nexus_detach(ipsec_nx_t nx
)
1688 nexus_controller_t controller
= kern_nexus_shared_controller();
1691 if (!uuid_is_null(nx
->ms_host
)) {
1692 err
= kern_nexus_ifdetach(controller
,
1696 printf("%s: kern_nexus_ifdetach ms host failed %d\n",
1701 if (!uuid_is_null(nx
->ms_device
)) {
1702 err
= kern_nexus_ifdetach(controller
,
1706 printf("%s: kern_nexus_ifdetach ms device failed %d\n",
1711 ipsec_detach_provider_and_instance(nx
->if_provider
,
1713 ipsec_detach_provider_and_instance(nx
->ms_provider
,
1716 memset(nx
, 0, sizeof(*nx
));
1720 ipsec_create_fs_provider_and_instance(struct ipsec_pcb
*pcb
,
1721 uint32_t subtype
, const char *type_name
,
1723 uuid_t
*provider
, uuid_t
*instance
)
1725 nexus_attr_t attr
= NULL
;
1726 nexus_controller_t controller
= kern_nexus_shared_controller();
1729 struct kern_nexus_init init
;
1730 nexus_name_t provider_name
;
1732 err
= kern_nexus_get_builtin_domain_provider(NEXUS_TYPE_FLOW_SWITCH
,
1734 IPSEC_IF_VERIFY(err
== 0);
1736 printf("%s can't get %s provider, error %d\n",
1737 __func__
, type_name
, err
);
1741 err
= kern_nexus_attr_create(&attr
);
1742 IPSEC_IF_VERIFY(err
== 0);
1744 printf("%s: kern_nexus_attr_create failed: %d\n",
1749 err
= kern_nexus_attr_set(attr
, NEXUS_ATTR_EXTENSIONS
, subtype
);
1752 uint64_t slot_buffer_size
= pcb
->ipsec_slot_size
;
1753 err
= kern_nexus_attr_set(attr
, NEXUS_ATTR_SLOT_BUF_SIZE
, slot_buffer_size
);
1756 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1757 uint64_t tx_ring_size
= pcb
->ipsec_tx_fsw_ring_size
;
1758 err
= kern_nexus_attr_set(attr
, NEXUS_ATTR_TX_SLOTS
, tx_ring_size
);
1760 uint64_t rx_ring_size
= pcb
->ipsec_rx_fsw_ring_size
;
1761 err
= kern_nexus_attr_set(attr
, NEXUS_ATTR_RX_SLOTS
, rx_ring_size
);
1764 snprintf((char *)provider_name
, sizeof(provider_name
),
1765 "com.apple.%s.%s", type_name
, ifname
);
1766 err
= kern_nexus_controller_register_provider(controller
,
1773 kern_nexus_attr_destroy(attr
);
1775 IPSEC_IF_VERIFY(err
== 0);
1777 printf("%s register %s provider failed, error %d\n",
1778 __func__
, type_name
, err
);
1781 bzero(&init
, sizeof (init
));
1782 init
.nxi_version
= KERN_NEXUS_CURRENT_VERSION
;
1783 err
= kern_nexus_controller_alloc_provider_instance(controller
,
1787 IPSEC_IF_VERIFY(err
== 0);
1789 printf("%s alloc_provider_instance %s failed, %d\n",
1790 __func__
, type_name
, err
);
1791 kern_nexus_controller_deregister_provider(controller
,
1793 uuid_clear(*provider
);
1800 ipsec_multistack_attach(struct ipsec_pcb
*pcb
)
1802 nexus_controller_t controller
= kern_nexus_shared_controller();
1804 ipsec_nx_t nx
= &pcb
->ipsec_nx
;
1806 // Allocate multistack flowswitch
1807 err
= ipsec_create_fs_provider_and_instance(pcb
,
1808 NEXUS_EXTENSION_FSW_TYPE_MULTISTACK
,
1810 pcb
->ipsec_ifp
->if_xname
,
1814 printf("%s: failed to create bridge provider and instance\n",
1819 // Attach multistack to device port
1820 err
= kern_nexus_ifattach(controller
, nx
->ms_instance
,
1821 NULL
, nx
->if_instance
,
1822 FALSE
, &nx
->ms_device
);
1824 printf("%s kern_nexus_ifattach ms device %d\n", __func__
, err
);
1828 // Attach multistack to host port
1829 err
= kern_nexus_ifattach(controller
, nx
->ms_instance
,
1830 NULL
, nx
->if_instance
,
1831 TRUE
, &nx
->ms_host
);
1833 printf("%s kern_nexus_ifattach ms host %d\n", __func__
, err
);
1837 // Extract the agent UUID and save for later
1838 struct kern_nexus
*multistack_nx
= nx_find(nx
->ms_instance
, false);
1839 if (multistack_nx
!= NULL
) {
1840 struct nx_flowswitch
*flowswitch
= NX_FSW_PRIVATE(multistack_nx
);
1841 if (flowswitch
!= NULL
) {
1842 FSW_RLOCK(flowswitch
);
1843 struct fsw_ms_context
*ms_context
= (struct fsw_ms_context
*)flowswitch
->fsw_ops_private
;
1844 if (ms_context
!= NULL
) {
1845 uuid_copy(nx
->ms_agent
, ms_context
->mc_agent_uuid
);
1847 printf("ipsec_multistack_attach - fsw_ms_context is NULL\n");
1849 FSW_UNLOCK(flowswitch
);
1851 printf("ipsec_multistack_attach - flowswitch is NULL\n");
1853 nx_release(multistack_nx
);
1855 printf("ipsec_multistack_attach - unable to find multistack nexus\n");
1861 ipsec_nexus_detach(nx
);
1863 errno_t detach_error
= 0;
1864 if ((detach_error
= ifnet_detach(pcb
->ipsec_ifp
)) != 0) {
1865 panic("ipsec_multistack_attach - ifnet_detach failed: %d\n", detach_error
);
1872 #pragma mark Kernel Pipe Nexus
1875 ipsec_register_kernel_pipe_nexus(void)
1877 nexus_attr_t nxa
= NULL
;
1880 lck_mtx_lock(&ipsec_lock
);
1881 if (ipsec_ncd_refcount
++) {
1882 lck_mtx_unlock(&ipsec_lock
);
1886 result
= kern_nexus_controller_create(&ipsec_ncd
);
1888 printf("%s: kern_nexus_controller_create failed: %d\n",
1889 __FUNCTION__
, result
);
1894 result
= kern_nexus_get_builtin_domain_provider(
1895 NEXUS_TYPE_KERNEL_PIPE
, &dom_prov
);
1897 printf("%s: kern_nexus_get_builtin_domain_provider failed: %d\n",
1898 __FUNCTION__
, result
);
1902 struct kern_nexus_provider_init prov_init
= {
1903 .nxpi_version
= KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION
,
1904 .nxpi_flags
= NXPIF_VIRTUAL_DEVICE
,
1905 .nxpi_pre_connect
= ipsec_nexus_pre_connect
,
1906 .nxpi_connected
= ipsec_nexus_connected
,
1907 .nxpi_pre_disconnect
= ipsec_nexus_pre_disconnect
,
1908 .nxpi_disconnected
= ipsec_nexus_disconnected
,
1909 .nxpi_ring_init
= ipsec_kpipe_ring_init
,
1910 .nxpi_ring_fini
= ipsec_kpipe_ring_fini
,
1911 .nxpi_slot_init
= NULL
,
1912 .nxpi_slot_fini
= NULL
,
1913 .nxpi_sync_tx
= ipsec_kpipe_sync_tx
,
1914 .nxpi_sync_rx
= ipsec_kpipe_sync_rx
,
1915 .nxpi_tx_doorbell
= NULL
,
1918 result
= kern_nexus_attr_create(&nxa
);
1920 printf("%s: kern_nexus_attr_create failed: %d\n",
1921 __FUNCTION__
, result
);
1925 uint64_t slot_buffer_size
= IPSEC_IF_DEFAULT_SLOT_SIZE
;
1926 result
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_SLOT_BUF_SIZE
, slot_buffer_size
);
1927 VERIFY(result
== 0);
1929 // Reset ring size for kernel pipe nexus to limit memory usage
1930 uint64_t ring_size
= if_ipsec_ring_size
;
1931 result
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_TX_SLOTS
, ring_size
);
1932 VERIFY(result
== 0);
1933 result
= kern_nexus_attr_set(nxa
, NEXUS_ATTR_RX_SLOTS
, ring_size
);
1934 VERIFY(result
== 0);
1936 result
= kern_nexus_controller_register_provider(ipsec_ncd
,
1938 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
1944 printf("%s: kern_nexus_controller_register_provider failed: %d\n",
1945 __FUNCTION__
, result
);
1951 kern_nexus_attr_destroy(nxa
);
1956 kern_nexus_controller_destroy(ipsec_ncd
);
1959 ipsec_ncd_refcount
= 0;
1962 lck_mtx_unlock(&ipsec_lock
);
1968 ipsec_unregister_kernel_pipe_nexus(void)
1970 lck_mtx_lock(&ipsec_lock
);
1972 VERIFY(ipsec_ncd_refcount
> 0);
1974 if (--ipsec_ncd_refcount
== 0) {
1975 kern_nexus_controller_destroy(ipsec_ncd
);
1979 lck_mtx_unlock(&ipsec_lock
);
1982 // For use by socket option, not internally
1984 ipsec_disable_channel(struct ipsec_pcb
*pcb
)
1990 lck_rw_lock_exclusive(&pcb
->ipsec_pcb_lock
);
1992 enabled
= pcb
->ipsec_kpipe_enabled
;
1993 uuid_copy(uuid
, pcb
->ipsec_kpipe_uuid
);
1995 VERIFY(uuid_is_null(pcb
->ipsec_kpipe_uuid
) == !enabled
);
1997 pcb
->ipsec_kpipe_enabled
= 0;
1998 uuid_clear(pcb
->ipsec_kpipe_uuid
);
2000 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
2003 result
= kern_nexus_controller_free_provider_instance(ipsec_ncd
, uuid
);
2009 ipsec_unregister_kernel_pipe_nexus();
2016 ipsec_enable_channel(struct ipsec_pcb
*pcb
, struct proc
*proc
)
2018 struct kern_nexus_init init
;
2021 result
= ipsec_register_kernel_pipe_nexus();
2028 lck_rw_lock_exclusive(&pcb
->ipsec_pcb_lock
);
2030 if (pcb
->ipsec_kpipe_enabled
) {
2031 result
= EEXIST
; // return success instead?
2035 VERIFY(uuid_is_null(pcb
->ipsec_kpipe_uuid
));
2036 bzero(&init
, sizeof (init
));
2037 init
.nxi_version
= KERN_NEXUS_CURRENT_VERSION
;
2038 result
= kern_nexus_controller_alloc_provider_instance(ipsec_ncd
,
2039 ipsec_kpipe_uuid
, pcb
, &pcb
->ipsec_kpipe_uuid
, &init
);
2044 nexus_port_t port
= NEXUS_PORT_KERNEL_PIPE_CLIENT
;
2045 result
= kern_nexus_controller_bind_provider_instance(ipsec_ncd
,
2046 pcb
->ipsec_kpipe_uuid
, &port
,
2047 proc_pid(proc
), NULL
, NULL
, 0, NEXUS_BIND_PID
);
2049 kern_nexus_controller_free_provider_instance(ipsec_ncd
,
2050 pcb
->ipsec_kpipe_uuid
);
2051 uuid_clear(pcb
->ipsec_kpipe_uuid
);
2055 pcb
->ipsec_kpipe_enabled
= 1;
2058 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
2061 ipsec_unregister_kernel_pipe_nexus();
2067 #endif // IPSEC_NEXUS
2070 /* Kernel control functions */
2073 ipsec_free_pcb(struct ipsec_pcb
*pcb
, bool in_list
)
2076 mbuf_freem_list(pcb
->ipsec_input_chain
);
2077 lck_mtx_destroy(&pcb
->ipsec_input_chain_lock
, ipsec_lck_grp
);
2078 #endif // IPSEC_NEXUS
2079 lck_rw_destroy(&pcb
->ipsec_pcb_lock
, ipsec_lck_grp
);
2081 lck_mtx_lock(&ipsec_lock
);
2082 TAILQ_REMOVE(&ipsec_head
, pcb
, ipsec_chain
);
2083 lck_mtx_unlock(&ipsec_lock
);
2085 zfree(ipsec_pcb_zone
, pcb
);
2089 ipsec_ctl_bind(kern_ctl_ref kctlref
,
2090 struct sockaddr_ctl
*sac
,
2093 struct ipsec_pcb
*pcb
= zalloc(ipsec_pcb_zone
);
2094 memset(pcb
, 0, sizeof(*pcb
));
2096 /* Setup the protocol control block */
2098 pcb
->ipsec_ctlref
= kctlref
;
2099 pcb
->ipsec_unit
= sac
->sc_unit
;
2100 pcb
->ipsec_output_service_class
= MBUF_SC_OAM
;
2103 pcb
->ipsec_use_netif
= false;
2104 pcb
->ipsec_slot_size
= IPSEC_IF_DEFAULT_SLOT_SIZE
;
2105 pcb
->ipsec_netif_ring_size
= IPSEC_IF_DEFAULT_RING_SIZE
;
2106 pcb
->ipsec_tx_fsw_ring_size
= IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE
;
2107 pcb
->ipsec_rx_fsw_ring_size
= IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE
;
2108 #endif // IPSEC_NEXUS
2110 lck_rw_init(&pcb
->ipsec_pcb_lock
, ipsec_lck_grp
, ipsec_lck_attr
);
2112 lck_mtx_init(&pcb
->ipsec_input_chain_lock
, ipsec_lck_grp
, ipsec_lck_attr
);
2113 #endif // IPSEC_NEXUS
2119 ipsec_ctl_connect(kern_ctl_ref kctlref
,
2120 struct sockaddr_ctl
*sac
,
2123 struct ifnet_init_eparams ipsec_init
= {};
2126 if (*unitinfo
== NULL
) {
2127 (void)ipsec_ctl_bind(kctlref
, sac
, unitinfo
);
2130 struct ipsec_pcb
*pcb
= *unitinfo
;
2132 lck_mtx_lock(&ipsec_lock
);
2134 /* Find some open interface id */
2135 u_int32_t chosen_unique_id
= 1;
2136 struct ipsec_pcb
*next_pcb
= TAILQ_LAST(&ipsec_head
, ipsec_list
);
2137 if (next_pcb
!= NULL
) {
2138 /* List was not empty, add one to the last item */
2139 chosen_unique_id
= next_pcb
->ipsec_unique_id
+ 1;
2143 * If this wrapped the id number, start looking at
2144 * the front of the list for an unused id.
2146 if (chosen_unique_id
== 0) {
2147 /* Find the next unused ID */
2148 chosen_unique_id
= 1;
2149 TAILQ_FOREACH(next_pcb
, &ipsec_head
, ipsec_chain
) {
2150 if (next_pcb
->ipsec_unique_id
> chosen_unique_id
) {
2151 /* We found a gap */
2155 chosen_unique_id
= next_pcb
->ipsec_unique_id
+ 1;
2160 pcb
->ipsec_unique_id
= chosen_unique_id
;
2162 if (next_pcb
!= NULL
) {
2163 TAILQ_INSERT_BEFORE(next_pcb
, pcb
, ipsec_chain
);
2165 TAILQ_INSERT_TAIL(&ipsec_head
, pcb
, ipsec_chain
);
2167 lck_mtx_unlock(&ipsec_lock
);
2169 snprintf(pcb
->ipsec_if_xname
, sizeof(pcb
->ipsec_if_xname
), "ipsec%d", pcb
->ipsec_unit
- 1);
2170 snprintf(pcb
->ipsec_unique_name
, sizeof(pcb
->ipsec_unique_name
), "ipsecid%d", pcb
->ipsec_unique_id
- 1);
2171 printf("ipsec_ctl_connect: creating interface %s (id %s)\n", pcb
->ipsec_if_xname
, pcb
->ipsec_unique_name
);
2173 /* Create the interface */
2174 bzero(&ipsec_init
, sizeof(ipsec_init
));
2175 ipsec_init
.ver
= IFNET_INIT_CURRENT_VERSION
;
2176 ipsec_init
.len
= sizeof (ipsec_init
);
2179 if (pcb
->ipsec_use_netif
) {
2180 ipsec_init
.flags
= (IFNET_INIT_SKYWALK_NATIVE
| IFNET_INIT_NX_NOAUTO
);
2182 #endif // IPSEC_NEXUS
2184 ipsec_init
.flags
= IFNET_INIT_NX_NOAUTO
;
2185 ipsec_init
.start
= ipsec_start
;
2187 ipsec_init
.name
= "ipsec";
2188 ipsec_init
.unit
= pcb
->ipsec_unit
- 1;
2189 ipsec_init
.uniqueid
= pcb
->ipsec_unique_name
;
2190 ipsec_init
.uniqueid_len
= strlen(pcb
->ipsec_unique_name
);
2191 ipsec_init
.family
= ipsec_family
;
2192 ipsec_init
.subfamily
= IFNET_SUBFAMILY_IPSEC
;
2193 ipsec_init
.type
= IFT_OTHER
;
2194 ipsec_init
.demux
= ipsec_demux
;
2195 ipsec_init
.add_proto
= ipsec_add_proto
;
2196 ipsec_init
.del_proto
= ipsec_del_proto
;
2197 ipsec_init
.softc
= pcb
;
2198 ipsec_init
.ioctl
= ipsec_ioctl
;
2199 ipsec_init
.detach
= ipsec_detached
;
2202 if (pcb
->ipsec_use_netif
) {
2203 result
= ipsec_nexus_ifattach(pcb
, &ipsec_init
, &pcb
->ipsec_ifp
);
2205 printf("ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result
);
2206 ipsec_free_pcb(pcb
, true);
2211 result
= ipsec_multistack_attach(pcb
);
2213 printf("ipsec_ctl_connect - ipsec_multistack_attach failed: %d\n", result
);
2219 bpfattach(pcb
->ipsec_ifp
, DLT_RAW
, 0);
2221 #endif // IPSEC_NEXUS
2223 result
= ifnet_allocate_extended(&ipsec_init
, &pcb
->ipsec_ifp
);
2225 printf("ipsec_ctl_connect - ifnet_allocate failed: %d\n", result
);
2226 ipsec_free_pcb(pcb
, true);
2230 ipsec_ifnet_set_attrs(pcb
->ipsec_ifp
);
2232 /* Attach the interface */
2233 result
= ifnet_attach(pcb
->ipsec_ifp
, NULL
);
2235 printf("ipsec_ctl_connect - ifnet_attach failed: %d\n", result
);
2236 ifnet_release(pcb
->ipsec_ifp
);
2237 ipsec_free_pcb(pcb
, true);
2243 bpfattach(pcb
->ipsec_ifp
, DLT_NULL
, 0);
2246 /* The interfaces resoures allocated, mark it as running */
2247 ifnet_set_flags(pcb
->ipsec_ifp
, IFF_RUNNING
, IFF_RUNNING
);
2253 ipsec_detach_ip(ifnet_t interface
,
2254 protocol_family_t protocol
,
2257 errno_t result
= EPROTONOSUPPORT
;
2259 /* Attempt a detach */
2260 if (protocol
== PF_INET
) {
2263 bzero(&ifr
, sizeof(ifr
));
2264 snprintf(ifr
.ifr_name
, sizeof(ifr
.ifr_name
), "%s%d",
2265 ifnet_name(interface
), ifnet_unit(interface
));
2267 result
= sock_ioctl(pf_socket
, SIOCPROTODETACH
, &ifr
);
2269 else if (protocol
== PF_INET6
) {
2270 struct in6_ifreq ifr6
;
2272 bzero(&ifr6
, sizeof(ifr6
));
2273 snprintf(ifr6
.ifr_name
, sizeof(ifr6
.ifr_name
), "%s%d",
2274 ifnet_name(interface
), ifnet_unit(interface
));
2276 result
= sock_ioctl(pf_socket
, SIOCPROTODETACH_IN6
, &ifr6
);
2283 ipsec_remove_address(ifnet_t interface
,
2284 protocol_family_t protocol
,
2290 /* Attempt a detach */
2291 if (protocol
== PF_INET
) {
2294 bzero(&ifr
, sizeof(ifr
));
2295 snprintf(ifr
.ifr_name
, sizeof(ifr
.ifr_name
), "%s%d",
2296 ifnet_name(interface
), ifnet_unit(interface
));
2297 result
= ifaddr_address(address
, &ifr
.ifr_addr
, sizeof(ifr
.ifr_addr
));
2299 printf("ipsec_remove_address - ifaddr_address failed: %d", result
);
2302 result
= sock_ioctl(pf_socket
, SIOCDIFADDR
, &ifr
);
2304 printf("ipsec_remove_address - SIOCDIFADDR failed: %d", result
);
2308 else if (protocol
== PF_INET6
) {
2309 struct in6_ifreq ifr6
;
2311 bzero(&ifr6
, sizeof(ifr6
));
2312 snprintf(ifr6
.ifr_name
, sizeof(ifr6
.ifr_name
), "%s%d",
2313 ifnet_name(interface
), ifnet_unit(interface
));
2314 result
= ifaddr_address(address
, (struct sockaddr
*)&ifr6
.ifr_addr
,
2315 sizeof(ifr6
.ifr_addr
));
2317 printf("ipsec_remove_address - ifaddr_address failed (v6): %d",
2321 result
= sock_ioctl(pf_socket
, SIOCDIFADDR_IN6
, &ifr6
);
2323 printf("ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2331 ipsec_cleanup_family(ifnet_t interface
,
2332 protocol_family_t protocol
)
2335 socket_t pf_socket
= NULL
;
2336 ifaddr_t
*addresses
= NULL
;
2339 if (protocol
!= PF_INET
&& protocol
!= PF_INET6
) {
2340 printf("ipsec_cleanup_family - invalid protocol family %d\n", protocol
);
2344 /* Create a socket for removing addresses and detaching the protocol */
2345 result
= sock_socket(protocol
, SOCK_DGRAM
, 0, NULL
, NULL
, &pf_socket
);
2347 if (result
!= EAFNOSUPPORT
)
2348 printf("ipsec_cleanup_family - failed to create %s socket: %d\n",
2349 protocol
== PF_INET
? "IP" : "IPv6", result
);
2353 /* always set SS_PRIV, we want to close and detach regardless */
2354 sock_setpriv(pf_socket
, 1);
2356 result
= ipsec_detach_ip(interface
, protocol
, pf_socket
);
2357 if (result
== 0 || result
== ENXIO
) {
2358 /* We are done! We either detached or weren't attached. */
2361 else if (result
!= EBUSY
) {
2362 /* Uh, not really sure what happened here... */
2363 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result
);
2368 * At this point, we received an EBUSY error. This means there are
2369 * addresses attached. We should detach them and then try again.
2371 result
= ifnet_get_address_list_family(interface
, &addresses
, protocol
);
2373 printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2374 ifnet_name(interface
), ifnet_unit(interface
),
2375 protocol
== PF_INET
? "PF_INET" : "PF_INET6", result
);
2379 for (i
= 0; addresses
[i
] != 0; i
++) {
2380 ipsec_remove_address(interface
, protocol
, addresses
[i
], pf_socket
);
2382 ifnet_free_address_list(addresses
);
2386 * The addresses should be gone, we should try the remove again.
2388 result
= ipsec_detach_ip(interface
, protocol
, pf_socket
);
2389 if (result
!= 0 && result
!= ENXIO
) {
2390 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result
);
2394 if (pf_socket
!= NULL
)
2395 sock_close(pf_socket
);
2397 if (addresses
!= NULL
)
2398 ifnet_free_address_list(addresses
);
2402 ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref
,
2403 __unused u_int32_t unit
,
2406 struct ipsec_pcb
*pcb
= unitinfo
;
2415 // Tell the nexus to stop all rings
2416 if (pcb
->ipsec_netif_nexus
!= NULL
) {
2417 kern_nexus_stop(pcb
->ipsec_netif_nexus
);
2419 #endif // IPSEC_NEXUS
2421 lck_rw_lock_exclusive(&pcb
->ipsec_pcb_lock
);
2425 uuid_copy(kpipe_uuid
, pcb
->ipsec_kpipe_uuid
);
2426 uuid_clear(pcb
->ipsec_kpipe_uuid
);
2427 pcb
->ipsec_kpipe_enabled
= FALSE
;
2428 #endif // IPSEC_NEXUS
2430 pcb
->ipsec_ctlref
= NULL
;
2432 ifp
= pcb
->ipsec_ifp
;
2435 if (pcb
->ipsec_netif_nexus
!= NULL
) {
2437 * Quiesce the interface and flush any pending outbound packets.
2441 /* Increment refcnt, but detach interface */
2442 ifnet_incr_iorefcnt(ifp
);
2443 if ((result
= ifnet_detach(ifp
)) != 0) {
2444 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result
);
2449 * We want to do everything in our power to ensure that the interface
2450 * really goes away when the socket is closed. We must remove IP/IPv6
2451 * addresses and detach the protocols. Finally, we can remove and
2452 * release the interface.
2454 key_delsp_for_ipsec_if(ifp
);
2456 ipsec_cleanup_family(ifp
, AF_INET
);
2457 ipsec_cleanup_family(ifp
, AF_INET6
);
2459 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
2461 if (!uuid_is_null(kpipe_uuid
)) {
2462 if (kern_nexus_controller_free_provider_instance(ipsec_ncd
, kpipe_uuid
) == 0) {
2463 ipsec_unregister_kernel_pipe_nexus();
2466 ipsec_nexus_detach(&pcb
->ipsec_nx
);
2468 /* Decrement refcnt to finish detaching and freeing */
2469 ifnet_decr_iorefcnt(ifp
);
2471 #endif // IPSEC_NEXUS
2473 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
2476 if (!uuid_is_null(kpipe_uuid
)) {
2477 if (kern_nexus_controller_free_provider_instance(ipsec_ncd
, kpipe_uuid
) == 0) {
2478 ipsec_unregister_kernel_pipe_nexus();
2481 #endif // IPSEC_NEXUS
2484 * We want to do everything in our power to ensure that the interface
2485 * really goes away when the socket is closed. We must remove IP/IPv6
2486 * addresses and detach the protocols. Finally, we can remove and
2487 * release the interface.
2489 key_delsp_for_ipsec_if(ifp
);
2491 ipsec_cleanup_family(ifp
, AF_INET
);
2492 ipsec_cleanup_family(ifp
, AF_INET6
);
2495 * Detach now; ipsec_detach() will be called asynchronously once
2496 * the I/O reference count drops to 0. There we will invoke
2499 if ((result
= ifnet_detach(ifp
)) != 0) {
2500 printf("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result
);
2504 // Bound, but not connected
2505 lck_rw_unlock_exclusive(&pcb
->ipsec_pcb_lock
);
2506 ipsec_free_pcb(pcb
, false);
2513 ipsec_ctl_send(__unused kern_ctl_ref kctlref
,
2514 __unused u_int32_t unit
,
2515 __unused
void *unitinfo
,
2519 /* Receive messages from the control socket. Currently unused. */
2525 ipsec_ctl_setopt(__unused kern_ctl_ref kctlref
,
2526 __unused u_int32_t unit
,
2532 struct ipsec_pcb
*pcb
= unitinfo
;
2535 /* check for privileges for privileged options */
2537 case IPSEC_OPT_FLAGS
:
2538 case IPSEC_OPT_EXT_IFDATA_STATS
:
2539 case IPSEC_OPT_SET_DELEGATE_INTERFACE
:
2540 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS
:
2541 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2548 case IPSEC_OPT_FLAGS
:
2549 if (len
!= sizeof(u_int32_t
)) {
2552 pcb
->ipsec_flags
= *(u_int32_t
*)data
;
2556 case IPSEC_OPT_EXT_IFDATA_STATS
:
2557 if (len
!= sizeof(int)) {
2561 if (pcb
->ipsec_ifp
== NULL
) {
2562 // Only can set after connecting
2566 pcb
->ipsec_ext_ifdata_stats
= (*(int *)data
) ? 1 : 0;
2569 case IPSEC_OPT_INC_IFDATA_STATS_IN
:
2570 case IPSEC_OPT_INC_IFDATA_STATS_OUT
: {
2571 struct ipsec_stats_param
*utsp
= (struct ipsec_stats_param
*)data
;
2573 if (utsp
== NULL
|| len
< sizeof(struct ipsec_stats_param
)) {
2577 if (pcb
->ipsec_ifp
== NULL
) {
2578 // Only can set after connecting
2582 if (!pcb
->ipsec_ext_ifdata_stats
) {
2586 if (opt
== IPSEC_OPT_INC_IFDATA_STATS_IN
)
2587 ifnet_stat_increment_in(pcb
->ipsec_ifp
, utsp
->utsp_packets
,
2588 utsp
->utsp_bytes
, utsp
->utsp_errors
);
2590 ifnet_stat_increment_out(pcb
->ipsec_ifp
, utsp
->utsp_packets
,
2591 utsp
->utsp_bytes
, utsp
->utsp_errors
);
2595 case IPSEC_OPT_SET_DELEGATE_INTERFACE
: {
2596 ifnet_t del_ifp
= NULL
;
2597 char name
[IFNAMSIZ
];
2599 if (len
> IFNAMSIZ
- 1) {
2603 if (pcb
->ipsec_ifp
== NULL
) {
2604 // Only can set after connecting
2608 if (len
!= 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2609 bcopy(data
, name
, len
);
2611 result
= ifnet_find_by_name(name
, &del_ifp
);
2614 printf("%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
2615 __func__
, pcb
->ipsec_ifp
->if_xname
,
2618 result
= ifnet_set_delegate(pcb
->ipsec_ifp
, del_ifp
);
2620 ifnet_release(del_ifp
);
2625 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS
: {
2626 if (len
!= sizeof(int)) {
2630 if (pcb
->ipsec_ifp
== NULL
) {
2631 // Only can set after connecting
2635 mbuf_svc_class_t output_service_class
= so_tc2msc(*(int *)data
);
2636 if (output_service_class
== MBUF_SC_UNSPEC
) {
2637 pcb
->ipsec_output_service_class
= MBUF_SC_OAM
;
2639 pcb
->ipsec_output_service_class
= output_service_class
;
2641 printf("%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
2642 __func__
, pcb
->ipsec_ifp
->if_xname
,
2643 pcb
->ipsec_output_service_class
);
2648 case IPSEC_OPT_ENABLE_CHANNEL
: {
2649 if (len
!= sizeof(int)) {
2653 if (pcb
->ipsec_ifp
== NULL
) {
2654 // Only can set after connecting
2659 result
= ipsec_enable_channel(pcb
, current_proc());
2661 result
= ipsec_disable_channel(pcb
);
2666 case IPSEC_OPT_ENABLE_FLOWSWITCH
: {
2667 if (len
!= sizeof(int)) {
2671 if (pcb
->ipsec_ifp
== NULL
) {
2672 // Only can set after connecting
2676 if (!if_enable_netagent
) {
2681 if (!uuid_is_null(pcb
->ipsec_nx
.ms_agent
)) {
2682 if_add_netagent(pcb
->ipsec_ifp
, pcb
->ipsec_nx
.ms_agent
);
2685 if (!uuid_is_null(pcb
->ipsec_nx
.ms_agent
)) {
2686 if_delete_netagent(pcb
->ipsec_ifp
, pcb
->ipsec_nx
.ms_agent
);
2692 case IPSEC_OPT_INPUT_FRAG_SIZE
: {
2693 if (len
!= sizeof(u_int32_t
)) {
2697 u_int32_t input_frag_size
= *(u_int32_t
*)data
;
2698 if (input_frag_size
<= sizeof(struct ip6_hdr
)) {
2699 pcb
->ipsec_frag_size_set
= FALSE
;
2700 pcb
->ipsec_input_frag_size
= 0;
2702 printf("SET FRAG SIZE TO %u\n", input_frag_size
);
2703 pcb
->ipsec_frag_size_set
= TRUE
;
2704 pcb
->ipsec_input_frag_size
= input_frag_size
;
2708 case IPSEC_OPT_ENABLE_NETIF
: {
2709 if (len
!= sizeof(int)) {
2713 if (pcb
->ipsec_ifp
!= NULL
) {
2714 // Only can set before connecting
2718 pcb
->ipsec_use_netif
= true;
2721 case IPSEC_OPT_SLOT_SIZE
: {
2722 if (len
!= sizeof(u_int32_t
)) {
2726 if (pcb
->ipsec_ifp
!= NULL
) {
2727 // Only can set before connecting
2731 u_int32_t slot_size
= *(u_int32_t
*)data
;
2732 if (slot_size
< IPSEC_IF_MIN_SLOT_SIZE
||
2733 slot_size
> IPSEC_IF_MAX_SLOT_SIZE
) {
2736 pcb
->ipsec_slot_size
= slot_size
;
2739 case IPSEC_OPT_NETIF_RING_SIZE
: {
2740 if (len
!= sizeof(u_int32_t
)) {
2744 if (pcb
->ipsec_ifp
!= NULL
) {
2745 // Only can set before connecting
2749 u_int32_t ring_size
= *(u_int32_t
*)data
;
2750 if (ring_size
< IPSEC_IF_MIN_RING_SIZE
||
2751 ring_size
> IPSEC_IF_MAX_RING_SIZE
) {
2754 pcb
->ipsec_netif_ring_size
= ring_size
;
2757 case IPSEC_OPT_TX_FSW_RING_SIZE
: {
2758 if (len
!= sizeof(u_int32_t
)) {
2762 if (pcb
->ipsec_ifp
!= NULL
) {
2763 // Only can set before connecting
2767 u_int32_t ring_size
= *(u_int32_t
*)data
;
2768 if (ring_size
< IPSEC_IF_MIN_RING_SIZE
||
2769 ring_size
> IPSEC_IF_MAX_RING_SIZE
) {
2772 pcb
->ipsec_tx_fsw_ring_size
= ring_size
;
2775 case IPSEC_OPT_RX_FSW_RING_SIZE
: {
2776 if (len
!= sizeof(u_int32_t
)) {
2780 if (pcb
->ipsec_ifp
!= NULL
) {
2781 // Only can set before connecting
2785 u_int32_t ring_size
= *(u_int32_t
*)data
;
2786 if (ring_size
< IPSEC_IF_MIN_RING_SIZE
||
2787 ring_size
> IPSEC_IF_MAX_RING_SIZE
) {
2790 pcb
->ipsec_rx_fsw_ring_size
= ring_size
;
2794 #endif // IPSEC_NEXUS
2797 result
= ENOPROTOOPT
;
2805 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref
,
2806 __unused u_int32_t unit
,
2812 struct ipsec_pcb
*pcb
= unitinfo
;
2816 case IPSEC_OPT_FLAGS
: {
2817 if (*len
!= sizeof(u_int32_t
)) {
2820 *(u_int32_t
*)data
= pcb
->ipsec_flags
;
2825 case IPSEC_OPT_EXT_IFDATA_STATS
: {
2826 if (*len
!= sizeof(int)) {
2829 *(int *)data
= (pcb
->ipsec_ext_ifdata_stats
) ? 1 : 0;
2834 case IPSEC_OPT_IFNAME
: {
2835 if (*len
< MIN(strlen(pcb
->ipsec_if_xname
) + 1, sizeof(pcb
->ipsec_if_xname
))) {
2838 if (pcb
->ipsec_ifp
== NULL
) {
2839 // Only can get after connecting
2843 *len
= snprintf(data
, *len
, "%s", pcb
->ipsec_if_xname
) + 1;
2848 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS
: {
2849 if (*len
!= sizeof(int)) {
2852 *(int *)data
= so_svc2tc(pcb
->ipsec_output_service_class
);
2858 case IPSEC_OPT_GET_CHANNEL_UUID
: {
2859 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
2860 if (uuid_is_null(pcb
->ipsec_kpipe_uuid
)) {
2862 } else if (*len
!= sizeof(uuid_t
)) {
2865 uuid_copy(data
, pcb
->ipsec_kpipe_uuid
);
2867 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
2871 case IPSEC_OPT_INPUT_FRAG_SIZE
: {
2872 if (*len
!= sizeof(u_int32_t
)) {
2875 *(u_int32_t
*)data
= pcb
->ipsec_input_frag_size
;
2879 case IPSEC_OPT_SLOT_SIZE
: {
2880 if (*len
!= sizeof(u_int32_t
)) {
2883 *(u_int32_t
*)data
= pcb
->ipsec_slot_size
;
2887 case IPSEC_OPT_NETIF_RING_SIZE
: {
2888 if (*len
!= sizeof(u_int32_t
)) {
2891 *(u_int32_t
*)data
= pcb
->ipsec_netif_ring_size
;
2895 case IPSEC_OPT_TX_FSW_RING_SIZE
: {
2896 if (*len
!= sizeof(u_int32_t
)) {
2899 *(u_int32_t
*)data
= pcb
->ipsec_tx_fsw_ring_size
;
2903 case IPSEC_OPT_RX_FSW_RING_SIZE
: {
2904 if (*len
!= sizeof(u_int32_t
)) {
2907 *(u_int32_t
*)data
= pcb
->ipsec_rx_fsw_ring_size
;
2912 #endif // IPSEC_NEXUS
2915 result
= ENOPROTOOPT
;
2923 /* Network Interface functions */
2925 ipsec_output(ifnet_t interface
,
2928 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
2929 struct ipsec_output_state ipsec_state
;
2931 struct route_in6 ro6
;
2934 struct ip6_hdr
*ip6
;
2935 struct ip_out_args ipoa
;
2936 struct ip6_out_args ip6oa
;
2938 u_int ip_version
= 0;
2940 struct flowadv
*adv
= NULL
;
2942 // Make sure this packet isn't looping through the interface
2943 if (necp_get_last_interface_index_from_packet(data
) == interface
->if_index
) {
2945 goto ipsec_output_err
;
2948 // Mark the interface so NECP can evaluate tunnel policy
2949 necp_mark_packet_from_interface(data
, interface
);
2951 ip
= mtod(data
, struct ip
*);
2952 ip_version
= ip
->ip_v
;
2954 switch (ip_version
) {
2957 if (!pcb
->ipsec_use_netif
)
2958 #endif // IPSEC_NEXUS
2961 bpf_tap_out(pcb
->ipsec_ifp
, DLT_NULL
, data
, &af
, sizeof(af
));
2964 /* Apply encryption */
2965 memset(&ipsec_state
, 0, sizeof(ipsec_state
));
2966 ipsec_state
.m
= data
;
2967 ipsec_state
.dst
= (struct sockaddr
*)&ip
->ip_dst
;
2968 memset(&ipsec_state
.ro
, 0, sizeof(ipsec_state
.ro
));
2970 error
= ipsec4_interface_output(&ipsec_state
, interface
);
2971 /* Tunneled in IPv6 - packet is gone */
2972 if (error
== 0 && ipsec_state
.tunneled
== 6) {
2976 data
= ipsec_state
.m
;
2977 if (error
|| data
== NULL
) {
2979 printf("ipsec_output: ipsec4_output error %d.\n", error
);
2981 goto ipsec_output_err
;
2984 /* Set traffic class, set flow */
2985 m_set_service_class(data
, pcb
->ipsec_output_service_class
);
2986 data
->m_pkthdr
.pkt_flowsrc
= FLOWSRC_IFNET
;
2987 data
->m_pkthdr
.pkt_flowid
= interface
->if_flowhash
;
2988 data
->m_pkthdr
.pkt_proto
= ip
->ip_p
;
2989 data
->m_pkthdr
.pkt_flags
= (PKTF_FLOW_ID
| PKTF_FLOW_ADV
| PKTF_FLOW_LOCALSRC
);
2991 /* Flip endian-ness for ip_output */
2992 ip
= mtod(data
, struct ip
*);
2996 /* Increment statistics */
2997 length
= mbuf_pkthdr_len(data
);
2998 ifnet_stat_increment_out(interface
, 1, length
, 0);
3000 /* Send to ip_output */
3001 memset(&ro
, 0, sizeof(ro
));
3003 flags
= (IP_OUTARGS
| /* Passing out args to specify interface */
3004 IP_NOIPSEC
); /* To ensure the packet doesn't go through ipsec twice */
3006 memset(&ipoa
, 0, sizeof(ipoa
));
3007 ipoa
.ipoa_flowadv
.code
= 0;
3008 ipoa
.ipoa_flags
= IPOAF_SELECT_SRCIF
| IPOAF_BOUND_SRCADDR
;
3009 if (ipsec_state
.outgoing_if
) {
3010 ipoa
.ipoa_boundif
= ipsec_state
.outgoing_if
;
3011 ipoa
.ipoa_flags
|= IPOAF_BOUND_IF
;
3013 ipsec_set_ipoa_for_interface(pcb
->ipsec_ifp
, &ipoa
);
3015 adv
= &ipoa
.ipoa_flowadv
;
3017 (void)ip_output(data
, NULL
, &ro
, flags
, NULL
, &ipoa
);
3020 if (adv
->code
== FADV_FLOW_CONTROLLED
|| adv
->code
== FADV_SUSPENDED
) {
3022 ifnet_disable_output(interface
);
3029 if (!pcb
->ipsec_use_netif
)
3030 #endif // IPSEC_NEXUS
3033 bpf_tap_out(pcb
->ipsec_ifp
, DLT_NULL
, data
, &af
, sizeof(af
));
3036 data
= ipsec6_splithdr(data
);
3038 printf("ipsec_output: ipsec6_splithdr returned NULL\n");
3039 goto ipsec_output_err
;
3042 ip6
= mtod(data
, struct ip6_hdr
*);
3044 memset(&ipsec_state
, 0, sizeof(ipsec_state
));
3045 ipsec_state
.m
= data
;
3046 ipsec_state
.dst
= (struct sockaddr
*)&ip6
->ip6_dst
;
3047 memset(&ipsec_state
.ro
, 0, sizeof(ipsec_state
.ro
));
3049 error
= ipsec6_interface_output(&ipsec_state
, interface
, &ip6
->ip6_nxt
, ipsec_state
.m
);
3050 if (error
== 0 && ipsec_state
.tunneled
== 4) { /* tunneled in IPv4 - packet is gone */
3053 data
= ipsec_state
.m
;
3054 if (error
|| data
== NULL
) {
3056 printf("ipsec_output: ipsec6_output error %d\n", error
);
3058 goto ipsec_output_err
;
3061 /* Set traffic class, set flow */
3062 m_set_service_class(data
, pcb
->ipsec_output_service_class
);
3063 data
->m_pkthdr
.pkt_flowsrc
= FLOWSRC_IFNET
;
3064 data
->m_pkthdr
.pkt_flowid
= interface
->if_flowhash
;
3065 data
->m_pkthdr
.pkt_proto
= ip6
->ip6_nxt
;
3066 data
->m_pkthdr
.pkt_flags
= (PKTF_FLOW_ID
| PKTF_FLOW_ADV
| PKTF_FLOW_LOCALSRC
);
3068 /* Increment statistics */
3069 length
= mbuf_pkthdr_len(data
);
3070 ifnet_stat_increment_out(interface
, 1, length
, 0);
3072 /* Send to ip6_output */
3073 memset(&ro6
, 0, sizeof(ro6
));
3075 flags
= IPV6_OUTARGS
;
3077 memset(&ip6oa
, 0, sizeof(ip6oa
));
3078 ip6oa
.ip6oa_flowadv
.code
= 0;
3079 ip6oa
.ip6oa_flags
= IP6OAF_SELECT_SRCIF
| IP6OAF_BOUND_SRCADDR
;
3080 if (ipsec_state
.outgoing_if
) {
3081 ip6oa
.ip6oa_boundif
= ipsec_state
.outgoing_if
;
3082 ip6oa
.ip6oa_flags
|= IP6OAF_BOUND_IF
;
3084 ipsec_set_ip6oa_for_interface(pcb
->ipsec_ifp
, &ip6oa
);
3086 adv
= &ip6oa
.ip6oa_flowadv
;
3088 (void) ip6_output(data
, NULL
, &ro6
, flags
, NULL
, NULL
, &ip6oa
);
3091 if (adv
->code
== FADV_FLOW_CONTROLLED
|| adv
->code
== FADV_SUSPENDED
) {
3093 ifnet_disable_output(interface
);
3099 printf("ipsec_output: Received unknown packet version %d.\n", ip_version
);
3101 goto ipsec_output_err
;
3115 ipsec_start(ifnet_t interface
)
3118 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3120 VERIFY(pcb
!= NULL
);
3122 if (ifnet_dequeue(interface
, &data
) != 0)
3124 if (ipsec_output(interface
, data
) != 0)
3129 /* Network Interface functions */
3131 ipsec_demux(__unused ifnet_t interface
,
3133 __unused
char *frame_header
,
3134 protocol_family_t
*protocol
)
3139 while (data
!= NULL
&& mbuf_len(data
) < 1) {
3140 data
= mbuf_next(data
);
3146 ip
= mtod(data
, struct ip
*);
3147 ip_version
= ip
->ip_v
;
3149 switch(ip_version
) {
3151 *protocol
= PF_INET
;
3154 *protocol
= PF_INET6
;
3164 ipsec_add_proto(__unused ifnet_t interface
,
3165 protocol_family_t protocol
,
3166 __unused
const struct ifnet_demux_desc
*demux_array
,
3167 __unused u_int32_t demux_count
)
3182 ipsec_del_proto(__unused ifnet_t interface
,
3183 __unused protocol_family_t protocol
)
3189 ipsec_ioctl(ifnet_t interface
,
3193 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3199 if (pcb
->ipsec_use_netif
) {
3200 // Make sure we can fit packets in the channel buffers
3201 if (((uint64_t)((struct ifreq
*)data
)->ifr_mtu
) > pcb
->ipsec_slot_size
) {
3204 ifnet_set_mtu(interface
, (uint32_t)((struct ifreq
*)data
)->ifr_mtu
);
3207 #endif // IPSEC_NEXUS
3209 ifnet_set_mtu(interface
, ((struct ifreq
*)data
)->ifr_mtu
);
3215 /* ifioctl() takes care of it */
3219 result
= EOPNOTSUPP
;
3226 ipsec_detached(ifnet_t interface
)
3228 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3229 (void)ifnet_release(interface
);
3230 ipsec_free_pcb(pcb
, true);
3233 /* Protocol Handlers */
3236 ipsec_proto_input(ifnet_t interface
,
3237 protocol_family_t protocol
,
3239 __unused
char *frame_header
)
3241 mbuf_pkthdr_setrcvif(m
, interface
);
3244 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3245 if (!pcb
->ipsec_use_netif
)
3246 #endif // IPSEC_NEXUS
3249 struct ip
*ip
= mtod(m
, struct ip
*);
3250 if (ip
->ip_v
== 4) {
3252 } else if (ip
->ip_v
== 6) {
3255 bpf_tap_in(interface
, DLT_NULL
, m
, &af
, sizeof(af
));
3257 pktap_input(interface
, protocol
, m
, NULL
);
3259 if (proto_input(protocol
, m
) != 0) {
3260 ifnet_stat_increment_in(interface
, 0, 0, 1);
3263 ifnet_stat_increment_in(interface
, 1, m
->m_pkthdr
.len
, 0);
3270 ipsec_proto_pre_output(__unused ifnet_t interface
,
3271 protocol_family_t protocol
,
3272 __unused mbuf_t
*packet
,
3273 __unused
const struct sockaddr
*dest
,
3274 __unused
void *route
,
3275 __unused
char *frame_type
,
3276 __unused
char *link_layer_dest
)
3279 *(protocol_family_t
*)(void *)frame_type
= protocol
;
3284 ipsec_attach_proto(ifnet_t interface
,
3285 protocol_family_t protocol
)
3287 struct ifnet_attach_proto_param proto
;
3290 bzero(&proto
, sizeof(proto
));
3291 proto
.input
= ipsec_proto_input
;
3292 proto
.pre_output
= ipsec_proto_pre_output
;
3294 result
= ifnet_attach_protocol(interface
, protocol
, &proto
);
3295 if (result
!= 0 && result
!= EEXIST
) {
3296 printf("ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
3304 ipsec_inject_inbound_packet(ifnet_t interface
,
3307 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3310 if (pcb
->ipsec_use_netif
) {
3311 lck_rw_lock_shared(&pcb
->ipsec_pcb_lock
);
3313 lck_mtx_lock(&pcb
->ipsec_input_chain_lock
);
3314 if (pcb
->ipsec_input_chain
!= NULL
) {
3315 pcb
->ipsec_input_chain_last
->m_nextpkt
= packet
;
3317 pcb
->ipsec_input_chain
= packet
;
3319 while (packet
->m_nextpkt
) {
3320 VERIFY(packet
!= packet
->m_nextpkt
);
3321 packet
= packet
->m_nextpkt
;
3323 pcb
->ipsec_input_chain_last
= packet
;
3324 lck_mtx_unlock(&pcb
->ipsec_input_chain_lock
);
3326 kern_channel_ring_t rx_ring
= pcb
->ipsec_netif_rxring
;
3327 lck_rw_unlock_shared(&pcb
->ipsec_pcb_lock
);
3329 if (rx_ring
!= NULL
) {
3330 kern_channel_notify(rx_ring
, 0);
3335 #endif // IPSEC_NEXUS
3338 protocol_family_t protocol
;
3339 if ((error
= ipsec_demux(interface
, packet
, NULL
, &protocol
)) != 0) {
3343 return ipsec_proto_input(interface
, protocol
, packet
, NULL
);
3348 ipsec_set_pkthdr_for_interface(ifnet_t interface
, mbuf_t packet
, int family
)
3350 if (packet
!= NULL
&& interface
!= NULL
) {
3351 struct ipsec_pcb
*pcb
= ifnet_softc(interface
);
3353 /* Set traffic class, set flow */
3354 m_set_service_class(packet
, pcb
->ipsec_output_service_class
);
3355 packet
->m_pkthdr
.pkt_flowsrc
= FLOWSRC_IFNET
;
3356 packet
->m_pkthdr
.pkt_flowid
= interface
->if_flowhash
;
3357 if (family
== AF_INET
) {
3358 struct ip
*ip
= mtod(packet
, struct ip
*);
3359 packet
->m_pkthdr
.pkt_proto
= ip
->ip_p
;
3360 } else if (family
== AF_INET6
) {
3361 struct ip6_hdr
*ip6
= mtod(packet
, struct ip6_hdr
*);
3362 packet
->m_pkthdr
.pkt_proto
= ip6
->ip6_nxt
;
3364 packet
->m_pkthdr
.pkt_flags
= (PKTF_FLOW_ID
| PKTF_FLOW_ADV
| PKTF_FLOW_LOCALSRC
);
3370 ipsec_set_ipoa_for_interface(ifnet_t interface
, struct ip_out_args
*ipoa
)
3372 struct ipsec_pcb
*pcb
;
3374 if (interface
== NULL
|| ipoa
== NULL
)
3376 pcb
= ifnet_softc(interface
);
3378 if (net_qos_policy_restricted
== 0) {
3379 ipoa
->ipoa_flags
|= IPOAF_QOSMARKING_ALLOWED
;
3380 ipoa
->ipoa_sotc
= so_svc2tc(pcb
->ipsec_output_service_class
);
3381 } else if (pcb
->ipsec_output_service_class
!= MBUF_SC_VO
||
3382 net_qos_policy_restrict_avapps
!= 0) {
3383 ipoa
->ipoa_flags
&= ~IPOAF_QOSMARKING_ALLOWED
;
3385 ipoa
->ipoa_flags
|= IP6OAF_QOSMARKING_ALLOWED
;
3386 ipoa
->ipoa_sotc
= SO_TC_VO
;
3391 ipsec_set_ip6oa_for_interface(ifnet_t interface
, struct ip6_out_args
*ip6oa
)
3393 struct ipsec_pcb
*pcb
;
3395 if (interface
== NULL
|| ip6oa
== NULL
)
3397 pcb
= ifnet_softc(interface
);
3399 if (net_qos_policy_restricted
== 0) {
3400 ip6oa
->ip6oa_flags
|= IPOAF_QOSMARKING_ALLOWED
;
3401 ip6oa
->ip6oa_sotc
= so_svc2tc(pcb
->ipsec_output_service_class
);
3402 } else if (pcb
->ipsec_output_service_class
!= MBUF_SC_VO
||
3403 net_qos_policy_restrict_avapps
!= 0) {
3404 ip6oa
->ip6oa_flags
&= ~IPOAF_QOSMARKING_ALLOWED
;
3406 ip6oa
->ip6oa_flags
|= IP6OAF_QOSMARKING_ALLOWED
;
3407 ip6oa
->ip6oa_sotc
= SO_TC_VO
;