]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/net/if_ipsec.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / bsd / net / if_ipsec.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2012-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/systm.h>
30#include <sys/kern_control.h>
31#include <net/kpi_protocol.h>
32#include <net/kpi_interface.h>
33#include <sys/socket.h>
34#include <sys/socketvar.h>
35#include <net/if.h>
36#include <net/if_types.h>
37#include <net/bpf.h>
38#include <net/if_ipsec.h>
39#include <sys/mbuf.h>
40#include <sys/sockio.h>
41#include <netinet/in.h>
42#include <netinet/ip6.h>
43#include <netinet6/in6_var.h>
44#include <netinet6/ip6_var.h>
45#include <sys/kauth.h>
46#include <netinet6/ipsec.h>
47#include <netinet6/ipsec6.h>
48#include <netinet6/esp.h>
49#include <netinet6/esp6.h>
50#include <netinet/ip.h>
51#include <net/flowadv.h>
52#include <net/necp.h>
53#include <netkey/key.h>
54#include <net/pktap.h>
55#include <kern/zalloc.h>
56
57#define IPSEC_NEXUS 0
58
59extern int net_qos_policy_restricted;
60extern int net_qos_policy_restrict_avapps;
61
62/* Kernel Control functions */
63static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
64 void **unitinfo);
65static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
66 void **unitinfo);
67static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
68 void *unitinfo);
69static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
70 void *unitinfo, mbuf_t m, int flags);
71static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
72 int opt, void *data, size_t *len);
73static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
74 int opt, void *data, size_t len);
75
76/* Network Interface functions */
77static void ipsec_start(ifnet_t interface);
78static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
79static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
80 protocol_family_t *protocol);
81static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
82 const struct ifnet_demux_desc *demux_array,
83 u_int32_t demux_count);
84static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
85static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
86static void ipsec_detached(ifnet_t interface);
87
88/* Protocol handlers */
89static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
90static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
91 mbuf_t m, char *frame_header);
92static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
93 mbuf_t *packet, const struct sockaddr *dest, void *route,
94 char *frame_type, char *link_layer_dest);
95
96static kern_ctl_ref ipsec_kctlref;
97static u_int32_t ipsec_family;
98static lck_attr_t *ipsec_lck_attr;
99static lck_grp_attr_t *ipsec_lck_grp_attr;
100static lck_grp_t *ipsec_lck_grp;
101static lck_mtx_t ipsec_lock;
102
103#if IPSEC_NEXUS
104
105SYSCTL_DECL(_net_ipsec);
106SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
107static int if_ipsec_verify_interface_creation = 0;
108SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
109
110#define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
111
112#define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
113#define IPSEC_IF_DEFAULT_RING_SIZE 64
114#define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
115#define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
116#define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
117
118#define IPSEC_IF_MIN_RING_SIZE 16
119#define IPSEC_IF_MAX_RING_SIZE 1024
120
121#define IPSEC_IF_MIN_SLOT_SIZE 1024
122#define IPSEC_IF_MAX_SLOT_SIZE 4096
123
124static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
125static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
126static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
127
128static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
129static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
130static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
131
132SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
133 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
134SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
135 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
136SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
137 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
138
139static errno_t
140ipsec_register_nexus(void);
141
142typedef struct ipsec_nx {
143 uuid_t if_provider;
144 uuid_t if_instance;
145 uuid_t ms_provider;
146 uuid_t ms_instance;
147 uuid_t ms_device;
148 uuid_t ms_host;
149 uuid_t ms_agent;
150} *ipsec_nx_t;
151
152static nexus_controller_t ipsec_ncd;
153static int ipsec_ncd_refcount;
154static uuid_t ipsec_kpipe_uuid;
155
156#endif // IPSEC_NEXUS
157
158/* Control block allocated for each kernel control connection */
159struct ipsec_pcb {
160 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
161 kern_ctl_ref ipsec_ctlref;
162 ifnet_t ipsec_ifp;
163 u_int32_t ipsec_unit;
164 u_int32_t ipsec_unique_id;
165 u_int32_t ipsec_flags;
166 u_int32_t ipsec_input_frag_size;
167 bool ipsec_frag_size_set;
168 int ipsec_ext_ifdata_stats;
169 mbuf_svc_class_t ipsec_output_service_class;
170 char ipsec_if_xname[IFXNAMSIZ];
171 char ipsec_unique_name[IFXNAMSIZ];
172 // PCB lock protects state fields, like ipsec_kpipe_enabled
173 decl_lck_rw_data(, ipsec_pcb_lock);
174
175#if IPSEC_NEXUS
176 lck_mtx_t ipsec_input_chain_lock;
177 struct mbuf * ipsec_input_chain;
178 struct mbuf * ipsec_input_chain_last;
179 // Input chain lock protects the list of input mbufs
180 // The input chain lock must be taken AFTER the PCB lock if both are held
181 struct ipsec_nx ipsec_nx;
182 int ipsec_kpipe_enabled;
183 uuid_t ipsec_kpipe_uuid;
184 void * ipsec_kpipe_rxring;
185 void * ipsec_kpipe_txring;
186 kern_pbufpool_t ipsec_kpipe_pp;
187
188 kern_nexus_t ipsec_netif_nexus;
189 kern_pbufpool_t ipsec_netif_pp;
190 void * ipsec_netif_rxring;
191 void * ipsec_netif_txring;
192 uint64_t ipsec_netif_txring_size;
193
194 u_int32_t ipsec_slot_size;
195 u_int32_t ipsec_netif_ring_size;
196 u_int32_t ipsec_tx_fsw_ring_size;
197 u_int32_t ipsec_rx_fsw_ring_size;
198 bool ipsec_use_netif;
199 bool ipsec_needs_netagent;
200#endif // IPSEC_NEXUS
201};
202
203TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
204
205#define IPSEC_PCB_ZONE_MAX 32
206#define IPSEC_PCB_ZONE_NAME "net.if_ipsec"
207
208static unsigned int ipsec_pcb_size; /* size of zone element */
209static struct zone *ipsec_pcb_zone; /* zone for ipsec_pcb */
210
211#define IPSECQ_MAXLEN 256
212
213#if IPSEC_NEXUS
214static int
215sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
216{
217#pragma unused(arg1, arg2)
218 int value = if_ipsec_ring_size;
219
220 int error = sysctl_handle_int(oidp, &value, 0, req);
221 if (error || !req->newptr) {
222 return (error);
223 }
224
225 if (value < IPSEC_IF_MIN_RING_SIZE ||
226 value > IPSEC_IF_MAX_RING_SIZE) {
227 return (EINVAL);
228 }
229
230 if_ipsec_ring_size = value;
231
232 return (0);
233}
234
235static int
236sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
237{
238#pragma unused(arg1, arg2)
239 int value = if_ipsec_tx_fsw_ring_size;
240
241 int error = sysctl_handle_int(oidp, &value, 0, req);
242 if (error || !req->newptr) {
243 return (error);
244 }
245
246 if (value < IPSEC_IF_MIN_RING_SIZE ||
247 value > IPSEC_IF_MAX_RING_SIZE) {
248 return (EINVAL);
249 }
250
251 if_ipsec_tx_fsw_ring_size = value;
252
253 return (0);
254}
255
256static int
257sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
258{
259#pragma unused(arg1, arg2)
260 int value = if_ipsec_rx_fsw_ring_size;
261
262 int error = sysctl_handle_int(oidp, &value, 0, req);
263 if (error || !req->newptr) {
264 return (error);
265 }
266
267 if (value < IPSEC_IF_MIN_RING_SIZE ||
268 value > IPSEC_IF_MAX_RING_SIZE) {
269 return (EINVAL);
270 }
271
272 if_ipsec_rx_fsw_ring_size = value;
273
274 return (0);
275}
276#endif // IPSEC_NEXUS
277
278errno_t
279ipsec_register_control(void)
280{
281 struct kern_ctl_reg kern_ctl;
282 errno_t result = 0;
283
284 /* Find a unique value for our interface family */
285 result = mbuf_tag_id_find(IPSEC_CONTROL_NAME, &ipsec_family);
286 if (result != 0) {
287 printf("ipsec_register_control - mbuf_tag_id_find_internal failed: %d\n", result);
288 return result;
289 }
290
291 ipsec_pcb_size = sizeof(struct ipsec_pcb);
292 ipsec_pcb_zone = zinit(ipsec_pcb_size,
293 IPSEC_PCB_ZONE_MAX * ipsec_pcb_size,
294 0, IPSEC_PCB_ZONE_NAME);
295 if (ipsec_pcb_zone == NULL) {
296 printf("ipsec_register_control - zinit(ipsec_pcb) failed");
297 return ENOMEM;
298 }
299
300#if IPSEC_NEXUS
301 ipsec_register_nexus();
302#endif // IPSEC_NEXUS
303
304 TAILQ_INIT(&ipsec_head);
305
306 bzero(&kern_ctl, sizeof(kern_ctl));
307 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
308 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
309 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */
310 kern_ctl.ctl_sendsize = 64 * 1024;
311 kern_ctl.ctl_recvsize = 64 * 1024;
312 kern_ctl.ctl_bind = ipsec_ctl_bind;
313 kern_ctl.ctl_connect = ipsec_ctl_connect;
314 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
315 kern_ctl.ctl_send = ipsec_ctl_send;
316 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
317 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
318
319 result = ctl_register(&kern_ctl, &ipsec_kctlref);
320 if (result != 0) {
321 printf("ipsec_register_control - ctl_register failed: %d\n", result);
322 return result;
323 }
324
325 /* Register the protocol plumbers */
326 if ((result = proto_register_plumber(PF_INET, ipsec_family,
327 ipsec_attach_proto, NULL)) != 0) {
328 printf("ipsec_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n",
329 ipsec_family, result);
330 ctl_deregister(ipsec_kctlref);
331 return result;
332 }
333
334 /* Register the protocol plumbers */
335 if ((result = proto_register_plumber(PF_INET6, ipsec_family,
336 ipsec_attach_proto, NULL)) != 0) {
337 proto_unregister_plumber(PF_INET, ipsec_family);
338 ctl_deregister(ipsec_kctlref);
339 printf("ipsec_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n",
340 ipsec_family, result);
341 return result;
342 }
343
344 ipsec_lck_attr = lck_attr_alloc_init();
345 ipsec_lck_grp_attr = lck_grp_attr_alloc_init();
346 ipsec_lck_grp = lck_grp_alloc_init("ipsec", ipsec_lck_grp_attr);
347 lck_mtx_init(&ipsec_lock, ipsec_lck_grp, ipsec_lck_attr);
348
349 return 0;
350}
351
352/* Helpers */
353int
354ipsec_interface_isvalid (ifnet_t interface)
355{
356 struct ipsec_pcb *pcb = NULL;
357
358 if (interface == NULL)
359 return 0;
360
361 pcb = ifnet_softc(interface);
362
363 if (pcb == NULL)
364 return 0;
365
366 /* When ctl disconnects, ipsec_unit is set to 0 */
367 if (pcb->ipsec_unit == 0)
368 return 0;
369
370 return 1;
371}
372
373boolean_t
374ipsec_interface_needs_netagent(ifnet_t interface)
375{
376 struct ipsec_pcb *pcb = NULL;
377
378 if (interface == NULL) {
379 return (FALSE);
380 }
381
382 pcb = ifnet_softc(interface);
383
384 if (pcb == NULL) {
385 return (FALSE);
386 }
387
388 return (pcb->ipsec_needs_netagent == true);
389}
390
391static errno_t
392ipsec_ifnet_set_attrs(ifnet_t ifp)
393{
394 /* Set flags and additional information. */
395 ifnet_set_mtu(ifp, 1500);
396 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
397
398 /* The interface must generate its own IPv6 LinkLocal address,
399 * if possible following the recommendation of RFC2472 to the 64bit interface ID
400 */
401 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
402
403#if !IPSEC_NEXUS
404 /* Reset the stats in case as the interface may have been recycled */
405 struct ifnet_stats_param stats;
406 bzero(&stats, sizeof(struct ifnet_stats_param));
407 ifnet_set_stat(ifp, &stats);
408#endif // !IPSEC_NEXUS
409
410 return (0);
411}
412
413#if IPSEC_NEXUS
414
415static uuid_t ipsec_nx_dom_prov;
416
417static errno_t
418ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
419{
420 return 0;
421}
422
423static void
424ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
425{
426 // Ignore
427}
428
429static errno_t
430ipsec_register_nexus(void)
431{
432 const struct kern_nexus_domain_provider_init dp_init = {
433 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
434 .nxdpi_flags = 0,
435 .nxdpi_init = ipsec_nxdp_init,
436 .nxdpi_fini = ipsec_nxdp_fini
437 };
438 errno_t err = 0;
439
440 /* ipsec_nxdp_init() is called before this function returns */
441 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
442 (const uint8_t *) "com.apple.ipsec",
443 &dp_init, sizeof(dp_init),
444 &ipsec_nx_dom_prov);
445 if (err != 0) {
446 printf("%s: failed to register domain provider\n", __func__);
447 return (err);
448 }
449 return (0);
450}
451
452static errno_t
453ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
454{
455 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
456 pcb->ipsec_netif_nexus = nexus;
457 return (ipsec_ifnet_set_attrs(ifp));
458}
459
460static errno_t
461ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
462 proc_t p, kern_nexus_t nexus,
463 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
464{
465#pragma unused(nxprov, p)
466#pragma unused(nexus, nexus_port, channel, ch_ctx)
467 return (0);
468}
469
470static errno_t
471ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
472 kern_channel_t channel)
473{
474#pragma unused(nxprov, channel)
475 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
476 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
477 return (ok ? 0 : ENXIO);
478}
479
480static void
481ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
482 kern_channel_t channel)
483{
484#pragma unused(nxprov, nexus, channel)
485}
486
487static void
488ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
489 kern_channel_t channel)
490{
491#pragma unused(nxprov, nexus, channel)
492}
493
494static void
495ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
496 kern_channel_t channel)
497{
498#pragma unused(nxprov, channel)
499 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
500 if (pcb->ipsec_netif_nexus == nexus) {
501 pcb->ipsec_netif_nexus = NULL;
502 }
503 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
504}
505
506static errno_t
507ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
508 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
509 void **ring_ctx)
510{
511#pragma unused(nxprov)
512#pragma unused(channel)
513#pragma unused(ring_ctx)
514 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
515 if (!is_tx_ring) {
516 VERIFY(pcb->ipsec_kpipe_rxring == NULL);
517 pcb->ipsec_kpipe_rxring = ring;
518 } else {
519 VERIFY(pcb->ipsec_kpipe_txring == NULL);
520 pcb->ipsec_kpipe_txring = ring;
521 }
522 return 0;
523}
524
525static void
526ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
527 kern_channel_ring_t ring)
528{
529#pragma unused(nxprov)
530 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
531 if (pcb->ipsec_kpipe_rxring == ring) {
532 pcb->ipsec_kpipe_rxring = NULL;
533 } else if (pcb->ipsec_kpipe_txring == ring) {
534 pcb->ipsec_kpipe_txring = NULL;
535 }
536}
537
538static errno_t
539ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
540 kern_channel_ring_t tx_ring, uint32_t flags)
541{
542#pragma unused(nxprov)
543#pragma unused(flags)
544 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
545
546 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
547 int channel_enabled = pcb->ipsec_kpipe_enabled;
548 if (!channel_enabled) {
549 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
550 return 0;
551 }
552
553 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
554 if (tx_slot == NULL) {
555 // Nothing to write, bail
556 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
557 return 0;
558 }
559
560 // Signal the netif ring to read
561 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring;
562 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
563
564 if (rx_ring != NULL) {
565 kern_channel_notify(rx_ring, 0);
566 }
567 return 0;
568}
569
570static mbuf_t
571ipsec_encrypt_mbuf(ifnet_t interface,
572 mbuf_t data)
573{
574 struct ipsec_output_state ipsec_state;
575 int error = 0;
576 uint32_t af;
577
578 // Make sure this packet isn't looping through the interface
579 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
580 error = -1;
581 goto ipsec_output_err;
582 }
583
584 // Mark the interface so NECP can evaluate tunnel policy
585 necp_mark_packet_from_interface(data, interface);
586
587 struct ip *ip = mtod(data, struct ip *);
588 u_int ip_version = ip->ip_v;
589
590 switch (ip_version) {
591 case 4: {
592 af = AF_INET;
593
594 memset(&ipsec_state, 0, sizeof(ipsec_state));
595 ipsec_state.m = data;
596 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
597 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
598
599 error = ipsec4_interface_output(&ipsec_state, interface);
600 if (error == 0 && ipsec_state.tunneled == 6) {
601 // Tunneled in IPv6 - packet is gone
602 // TODO: Don't lose mbuf
603 data = NULL;
604 goto done;
605 }
606
607 data = ipsec_state.m;
608 if (error || data == NULL) {
609 if (error) {
610 printf("ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
611 }
612 goto ipsec_output_err;
613 }
614 goto done;
615 }
616 case 6: {
617 af = AF_INET6;
618
619 data = ipsec6_splithdr(data);
620 if (data == NULL) {
621 printf("ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
622 goto ipsec_output_err;
623 }
624
625 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
626
627 memset(&ipsec_state, 0, sizeof(ipsec_state));
628 ipsec_state.m = data;
629 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
630 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
631
632 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
633 if (error == 0 && ipsec_state.tunneled == 4) {
634 // Tunneled in IPv4 - packet is gone
635 // TODO: Don't lose mbuf
636 data = NULL;
637 goto done;
638 }
639 data = ipsec_state.m;
640 if (error || data == NULL) {
641 if (error) {
642 printf("ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
643 }
644 goto ipsec_output_err;
645 }
646 goto done;
647 }
648 default: {
649 printf("ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
650 error = -1;
651 goto ipsec_output_err;
652 }
653 }
654
655done:
656 return data;
657
658ipsec_output_err:
659 if (data) {
660 mbuf_freem(data);
661 }
662 return NULL;
663}
664
665static errno_t
666ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
667 kern_channel_ring_t rx_ring, uint32_t flags)
668{
669#pragma unused(nxprov)
670#pragma unused(flags)
671 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
672 struct kern_channel_ring_stat_increment rx_ring_stats;
673
674 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
675
676 int channel_enabled = pcb->ipsec_kpipe_enabled;
677 if (!channel_enabled) {
678 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
679 return 0;
680 }
681
682 // Reclaim user-released slots
683 (void) kern_channel_reclaim(rx_ring);
684
685 uint32_t avail = kern_channel_available_slot_count(rx_ring);
686 if (avail == 0) {
687 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
688 return 0;
689 }
690
691 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring;
692 if (tx_ring == NULL) {
693 // Net-If TX ring not set up yet, nothing to read
694 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
695 return 0;
696 }
697
698 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
699
700 // Unlock ipsec before entering ring
701 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
702
703 (void)kr_enter(tx_ring, TRUE);
704
705 // Lock again after entering and validate
706 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
707 if (tx_ring != pcb->ipsec_netif_txring) {
708 // Ring no longer valid
709 // Unlock first, then exit ring
710 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
711 kr_exit(tx_ring);
712 return 0;
713 }
714
715
716 struct kern_channel_ring_stat_increment tx_ring_stats;
717 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
718 kern_channel_slot_t tx_pslot = NULL;
719 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
720 if (tx_slot == NULL) {
721 // Nothing to read, don't bother signalling
722 // Unlock first, then exit ring
723 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
724 kr_exit(tx_ring);
725 return 0;
726 }
727
728 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
729 VERIFY(rx_pp != NULL);
730 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
731 kern_channel_slot_t rx_pslot = NULL;
732 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
733
734 while (rx_slot != NULL && tx_slot != NULL) {
735 size_t length = 0;
736 mbuf_t data = NULL;
737 errno_t error = 0;
738
739 // Allocate rx packet
740 kern_packet_t rx_ph = 0;
741 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
742 if (__improbable(error != 0)) {
743 printf("ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
744 pcb->ipsec_ifp->if_xname);
745 break;
746 }
747
748 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
749
750 // Advance TX ring
751 tx_pslot = tx_slot;
752 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
753
754 if (tx_ph == 0) {
755 kern_pbufpool_free(rx_pp, rx_ph);
756 continue;
757 }
758
759 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
760 VERIFY(tx_buf != NULL);
761 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
762 VERIFY(tx_baddr != NULL);
763 tx_baddr += kern_buflet_get_data_offset(tx_buf);
764
765 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
766
767 length = MIN(kern_packet_get_data_length(tx_ph),
768 pcb->ipsec_slot_size);
769
770 // Increment TX stats
771 tx_ring_stats.kcrsi_slots_transferred++;
772 tx_ring_stats.kcrsi_bytes_transferred += length;
773
774 if (length > 0) {
775 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
776 if (error == 0) {
777 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
778 if (error == 0) {
779 // Encrypt and send packet
780 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
781 } else {
782 printf("ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
783 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
784 STATS_INC(nifs, NETIF_STATS_DROPPED);
785 mbuf_freem(data);
786 data = NULL;
787 }
788 } else {
789 printf("ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
790 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
791 STATS_INC(nifs, NETIF_STATS_DROPPED);
792 }
793 } else {
794 printf("ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
795 STATS_INC(nifs, NETIF_STATS_BADLEN);
796 STATS_INC(nifs, NETIF_STATS_DROPPED);
797 }
798
799 if (data == NULL) {
800 printf("ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
801 kern_pbufpool_free(rx_pp, rx_ph);
802 break;
803 }
804
805 length = mbuf_pkthdr_len(data);
806 if (length > rx_pp->pp_buflet_size) {
807 // Flush data
808 mbuf_freem(data);
809 kern_pbufpool_free(rx_pp, rx_ph);
810 printf("ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
811 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
812 continue;
813 }
814
815 // Fillout rx packet
816 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
817 VERIFY(rx_buf != NULL);
818 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
819 VERIFY(rx_baddr != NULL);
820
821 // Copy-in data from mbuf to buflet
822 mbuf_copydata(data, 0, length, (void *)rx_baddr);
823 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
824
825 // Finalize and attach the packet
826 error = kern_buflet_set_data_offset(rx_buf, 0);
827 VERIFY(error == 0);
828 error = kern_buflet_set_data_length(rx_buf, length);
829 VERIFY(error == 0);
830 error = kern_packet_finalize(rx_ph);
831 VERIFY(error == 0);
832 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
833 VERIFY(error == 0);
834
835 STATS_INC(nifs, NETIF_STATS_TXPKTS);
836 STATS_INC(nifs, NETIF_STATS_TXCOPY_DIRECT);
837
838 rx_ring_stats.kcrsi_slots_transferred++;
839 rx_ring_stats.kcrsi_bytes_transferred += length;
840
841 if (!pcb->ipsec_ext_ifdata_stats) {
842 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, length, 0);
843 }
844
845 mbuf_freem(data);
846
847 rx_pslot = rx_slot;
848 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
849 }
850
851 if (rx_pslot) {
852 kern_channel_advance_slot(rx_ring, rx_pslot);
853 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
854 }
855
856 if (tx_pslot) {
857 kern_channel_advance_slot(tx_ring, tx_pslot);
858 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
859 (void)kern_channel_reclaim(tx_ring);
860 }
861
862 /* always reenable output */
863 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
864 if (error != 0) {
865 printf("ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
866 }
867
868 // Unlock first, then exit ring
869 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
870
871 if (tx_pslot != NULL) {
872 kern_channel_notify(tx_ring, 0);
873 }
874 kr_exit(tx_ring);
875
876 return 0;
877}
878
879static errno_t
880ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
881 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
882 void **ring_ctx)
883{
884#pragma unused(nxprov)
885#pragma unused(channel)
886#pragma unused(ring_ctx)
887 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
888 if (!is_tx_ring) {
889 VERIFY(pcb->ipsec_netif_rxring == NULL);
890 pcb->ipsec_netif_rxring = ring;
891 } else {
892 VERIFY(pcb->ipsec_netif_txring == NULL);
893 pcb->ipsec_netif_txring = ring;
894 }
895 return 0;
896}
897
898static void
899ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
900 kern_channel_ring_t ring)
901{
902#pragma unused(nxprov)
903 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
904 if (pcb->ipsec_netif_rxring == ring) {
905 pcb->ipsec_netif_rxring = NULL;
906 } else if (pcb->ipsec_netif_txring == ring) {
907 pcb->ipsec_netif_txring = NULL;
908 }
909}
910
911static bool
912ipsec_netif_check_policy(mbuf_t data)
913{
914 necp_kernel_policy_result necp_result = 0;
915 necp_kernel_policy_result_parameter necp_result_parameter = {};
916 uint32_t necp_matched_policy_id = 0;
917
918 // This packet has been marked with IP level policy, do not mark again.
919 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
920 return (true);
921 }
922
923 size_t length = mbuf_pkthdr_len(data);
924 if (length < sizeof(struct ip)) {
925 return (false);
926 }
927
928 struct ip *ip = mtod(data, struct ip *);
929 u_int ip_version = ip->ip_v;
930 switch (ip_version) {
931 case 4: {
932 necp_matched_policy_id = necp_ip_output_find_policy_match(data, 0, NULL,
933 &necp_result, &necp_result_parameter);
934 break;
935 }
936 case 6: {
937 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, 0, NULL,
938 &necp_result, &necp_result_parameter);
939 break;
940 }
941 default: {
942 return (false);
943 }
944 }
945
946 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
947 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
948 /* Drop and flow divert packets should be blocked at the IP layer */
949 return (false);
950 }
951
952 necp_mark_packet_from_ip(data, necp_matched_policy_id);
953 return (true);
954}
955
956static errno_t
957ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
958 kern_channel_ring_t tx_ring, uint32_t flags)
959{
960#pragma unused(nxprov)
961#pragma unused(flags)
962 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
963
964 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
965
966 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
967
968 struct kern_channel_ring_stat_increment tx_ring_stats;
969 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
970 kern_channel_slot_t tx_pslot = NULL;
971 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
972
973 STATS_INC(nifs, NETIF_STATS_TXSYNC);
974
975 if (tx_slot == NULL) {
976 // Nothing to write, don't bother signalling
977 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
978 return 0;
979 }
980
981 if (pcb->ipsec_kpipe_enabled) {
982 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring;
983 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
984
985 // Signal the kernel pipe ring to read
986 if (rx_ring != NULL) {
987 kern_channel_notify(rx_ring, 0);
988 }
989 return 0;
990 }
991
992 // If we're here, we're injecting into the BSD stack
993 while (tx_slot != NULL) {
994 size_t length = 0;
995 mbuf_t data = NULL;
996
997 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
998
999 // Advance TX ring
1000 tx_pslot = tx_slot;
1001 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1002
1003 if (tx_ph == 0) {
1004 continue;
1005 }
1006
1007 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1008 VERIFY(tx_buf != NULL);
1009 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1010 VERIFY(tx_baddr != 0);
1011 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1012
1013 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1014
1015 length = MIN(kern_packet_get_data_length(tx_ph),
1016 pcb->ipsec_slot_size);
1017
1018 if (length > 0) {
1019 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1020 if (error == 0) {
1021 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1022 if (error == 0) {
1023 // Mark packet from policy
1024 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1025 necp_mark_packet_from_ip(data, policy_id);
1026
1027 // Check policy with NECP
1028 if (!ipsec_netif_check_policy(data)) {
1029 printf("ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1030 STATS_INC(nifs, NETIF_STATS_DROPPED);
1031 mbuf_freem(data);
1032 data = NULL;
1033 } else {
1034 // Send through encryption
1035 error = ipsec_output(pcb->ipsec_ifp, data);
1036 if (error != 0) {
1037 printf("ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1038 }
1039 }
1040 } else {
1041 printf("ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1042 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1043 STATS_INC(nifs, NETIF_STATS_DROPPED);
1044 mbuf_freem(data);
1045 data = NULL;
1046 }
1047 } else {
1048 printf("ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1049 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1050 STATS_INC(nifs, NETIF_STATS_DROPPED);
1051 }
1052 } else {
1053 printf("ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1054 STATS_INC(nifs, NETIF_STATS_BADLEN);
1055 STATS_INC(nifs, NETIF_STATS_DROPPED);
1056 }
1057
1058 if (data == NULL) {
1059 printf("ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1060 break;
1061 }
1062
1063 STATS_INC(nifs, NETIF_STATS_TXPKTS);
1064 STATS_INC(nifs, NETIF_STATS_TXCOPY_MBUF);
1065
1066 tx_ring_stats.kcrsi_slots_transferred++;
1067 tx_ring_stats.kcrsi_bytes_transferred += length;
1068 }
1069
1070 if (tx_pslot) {
1071 kern_channel_advance_slot(tx_ring, tx_pslot);
1072 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1073 (void)kern_channel_reclaim(tx_ring);
1074 }
1075
1076 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1077
1078 return 0;
1079}
1080
1081static errno_t
1082ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1083 kern_channel_ring_t ring, __unused uint32_t flags)
1084{
1085#pragma unused(nxprov)
1086 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1087 boolean_t more = false;
1088 errno_t rc = 0;
1089
1090 /*
1091 * Refill and sync the ring; we may be racing against another thread doing
1092 * an RX sync that also wants to do kr_enter(), and so use the blocking
1093 * variant here.
1094 */
1095 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1096 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1097 printf("%s, tx refill failed %d\n", __func__, rc);
1098 }
1099
1100 (void) kr_enter(ring, TRUE);
1101 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1102
1103 if (pcb->ipsec_kpipe_enabled) {
1104 uint32_t tx_available = kern_channel_available_slot_count(ring);
1105 if (pcb->ipsec_netif_txring_size > 0 &&
1106 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1107 // No room left in tx ring, disable output for now
1108 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1109 if (error != 0) {
1110 printf("ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1111 }
1112 }
1113 }
1114
1115 if (pcb->ipsec_kpipe_enabled) {
1116 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring;
1117
1118 // Unlock while calling notify
1119 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1120 // Signal the kernel pipe ring to read
1121 if (rx_ring != NULL) {
1122 kern_channel_notify(rx_ring, 0);
1123 }
1124 } else {
1125 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1126 }
1127
1128 kr_exit(ring);
1129
1130 return (0);
1131}
1132
1133static errno_t
1134ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1135 kern_channel_ring_t rx_ring, uint32_t flags)
1136{
1137#pragma unused(nxprov)
1138#pragma unused(flags)
1139 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1140 struct kern_channel_ring_stat_increment rx_ring_stats;
1141
1142 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1143
1144 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1145
1146 // Reclaim user-released slots
1147 (void) kern_channel_reclaim(rx_ring);
1148
1149 STATS_INC(nifs, NETIF_STATS_RXSYNC);
1150
1151 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1152 if (avail == 0) {
1153 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1154 return 0;
1155 }
1156
1157 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1158 VERIFY(rx_pp != NULL);
1159 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1160 kern_channel_slot_t rx_pslot = NULL;
1161 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1162
1163 while (rx_slot != NULL) {
1164 // Check for a waiting packet
1165 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1166 mbuf_t data = pcb->ipsec_input_chain;
1167 if (data == NULL) {
1168 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1169 break;
1170 }
1171
1172 // Allocate rx packet
1173 kern_packet_t rx_ph = 0;
1174 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1175 if (__improbable(error != 0)) {
1176 STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
1177 STATS_INC(nifs, NETIF_STATS_DROPPED);
1178 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1179 break;
1180 }
1181
1182 // Advance waiting packets
1183 pcb->ipsec_input_chain = data->m_nextpkt;
1184 data->m_nextpkt = NULL;
1185 if (pcb->ipsec_input_chain == NULL) {
1186 pcb->ipsec_input_chain_last = NULL;
1187 }
1188 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1189
1190 size_t length = mbuf_pkthdr_len(data);
1191
1192 if (length < sizeof(struct ip)) {
1193 // Flush data
1194 mbuf_freem(data);
1195 kern_pbufpool_free(rx_pp, rx_ph);
1196 STATS_INC(nifs, NETIF_STATS_BADLEN);
1197 STATS_INC(nifs, NETIF_STATS_DROPPED);
1198 printf("ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1199 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1200 continue;
1201 }
1202
1203 uint32_t af = 0;
1204 struct ip *ip = mtod(data, struct ip *);
1205 u_int ip_version = ip->ip_v;
1206 switch (ip_version) {
1207 case 4: {
1208 af = AF_INET;
1209 break;
1210 }
1211 case 6: {
1212 af = AF_INET6;
1213 break;
1214 }
1215 default: {
1216 printf("ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1217 pcb->ipsec_ifp->if_xname, ip_version);
1218 break;
1219 }
1220 }
1221
1222 if (length > rx_pp->pp_buflet_size ||
1223 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1224
1225 // We need to fragment to send up into the netif
1226
1227 u_int32_t fragment_mtu = rx_pp->pp_buflet_size;
1228 if (pcb->ipsec_frag_size_set &&
1229 pcb->ipsec_input_frag_size < rx_pp->pp_buflet_size) {
1230 fragment_mtu = pcb->ipsec_input_frag_size;
1231 }
1232
1233 mbuf_t fragment_chain = NULL;
1234 switch (af) {
1235 case AF_INET: {
1236 // ip_fragment expects the length in host order
1237 ip->ip_len = ntohs(ip->ip_len);
1238
1239 // ip_fragment will modify the original data, don't free
1240 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1241 if (fragment_error == 0 && data != NULL) {
1242 fragment_chain = data;
1243 } else {
1244 STATS_INC(nifs, NETIF_STATS_BADLEN);
1245 STATS_INC(nifs, NETIF_STATS_DROPPED);
1246 printf("ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1247 pcb->ipsec_ifp->if_xname, length, fragment_error);
1248 }
1249 break;
1250 }
1251 case AF_INET6: {
1252 if (length < sizeof(struct ip6_hdr)) {
1253 mbuf_freem(data);
1254 STATS_INC(nifs, NETIF_STATS_BADLEN);
1255 STATS_INC(nifs, NETIF_STATS_DROPPED);
1256 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1257 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1258 } else {
1259
1260 // ip6_do_fragmentation will free the original data on success only
1261 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1262 struct ip6_exthdrs exthdrs;
1263 memset(&exthdrs, 0, sizeof(exthdrs));
1264
1265 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1266 ip6, &exthdrs, fragment_mtu, ip6->ip6_nxt);
1267 if (fragment_error == 0 && data != NULL) {
1268 fragment_chain = data;
1269 } else {
1270 mbuf_freem(data);
1271 STATS_INC(nifs, NETIF_STATS_BADLEN);
1272 STATS_INC(nifs, NETIF_STATS_DROPPED);
1273 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1274 pcb->ipsec_ifp->if_xname, length, fragment_error);
1275 }
1276 }
1277 break;
1278 }
1279 default: {
1280 // Cannot fragment unknown families
1281 mbuf_freem(data);
1282 STATS_INC(nifs, NETIF_STATS_BADLEN);
1283 STATS_INC(nifs, NETIF_STATS_DROPPED);
1284 printf("ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1285 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1286 break;
1287 }
1288 }
1289
1290 if (fragment_chain != NULL) {
1291 // Add fragments to chain before continuing
1292 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1293 if (pcb->ipsec_input_chain != NULL) {
1294 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1295 } else {
1296 pcb->ipsec_input_chain = fragment_chain;
1297 }
1298 while (fragment_chain->m_nextpkt) {
1299 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1300 fragment_chain = fragment_chain->m_nextpkt;
1301 }
1302 pcb->ipsec_input_chain_last = fragment_chain;
1303 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1304 }
1305
1306 // Make sure to free unused rx packet
1307 kern_pbufpool_free(rx_pp, rx_ph);
1308
1309 continue;
1310 }
1311
1312 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1313
1314 // Fillout rx packet
1315 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1316 VERIFY(rx_buf != NULL);
1317 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1318 VERIFY(rx_baddr != NULL);
1319
1320 // Copy-in data from mbuf to buflet
1321 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1322 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1323
1324 // Finalize and attach the packet
1325 error = kern_buflet_set_data_offset(rx_buf, 0);
1326 VERIFY(error == 0);
1327 error = kern_buflet_set_data_length(rx_buf, length);
1328 VERIFY(error == 0);
1329 error = kern_packet_set_link_header_offset(rx_ph, 0);
1330 VERIFY(error == 0);
1331 error = kern_packet_set_network_header_offset(rx_ph, 0);
1332 VERIFY(error == 0);
1333 error = kern_packet_finalize(rx_ph);
1334 VERIFY(error == 0);
1335 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1336 VERIFY(error == 0);
1337
1338 STATS_INC(nifs, NETIF_STATS_RXPKTS);
1339 STATS_INC(nifs, NETIF_STATS_RXCOPY_MBUF);
1340 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1341
1342 rx_ring_stats.kcrsi_slots_transferred++;
1343 rx_ring_stats.kcrsi_bytes_transferred += length;
1344
1345 if (!pcb->ipsec_ext_ifdata_stats) {
1346 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1347 }
1348
1349 mbuf_freem(data);
1350
1351 // Advance ring
1352 rx_pslot = rx_slot;
1353 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1354 }
1355
1356 struct kern_channel_ring_stat_increment tx_ring_stats;
1357 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1358 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring;
1359 kern_channel_slot_t tx_pslot = NULL;
1360 kern_channel_slot_t tx_slot = NULL;
1361 if (tx_ring == NULL) {
1362 // Net-If TX ring not set up yet, nothing to read
1363 goto done;
1364 }
1365
1366
1367 // Unlock ipsec before entering ring
1368 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1369
1370 (void)kr_enter(tx_ring, TRUE);
1371
1372 // Lock again after entering and validate
1373 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1374
1375 if (tx_ring != pcb->ipsec_kpipe_txring) {
1376 goto done;
1377 }
1378
1379 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1380 if (tx_slot == NULL) {
1381 // Nothing to read, don't bother signalling
1382 goto done;
1383 }
1384
1385 while (rx_slot != NULL && tx_slot != NULL) {
1386 size_t length = 0;
1387 mbuf_t data = NULL;
1388 errno_t error = 0;
1389 uint32_t af;
1390
1391 // Allocate rx packet
1392 kern_packet_t rx_ph = 0;
1393 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1394 if (__improbable(error != 0)) {
1395 STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
1396 STATS_INC(nifs, NETIF_STATS_DROPPED);
1397 break;
1398 }
1399
1400 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1401
1402 // Advance TX ring
1403 tx_pslot = tx_slot;
1404 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1405
1406 if (tx_ph == 0) {
1407 kern_pbufpool_free(rx_pp, rx_ph);
1408 continue;
1409 }
1410
1411 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1412 VERIFY(tx_buf != NULL);
1413 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1414 VERIFY(tx_baddr != 0);
1415 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1416
1417 length = MIN(kern_packet_get_data_length(tx_ph),
1418 pcb->ipsec_slot_size);
1419
1420 // Increment TX stats
1421 tx_ring_stats.kcrsi_slots_transferred++;
1422 tx_ring_stats.kcrsi_bytes_transferred += length;
1423
1424 if (length >= sizeof(struct ip)) {
1425 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1426 if (error == 0) {
1427 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1428 if (error == 0) {
1429 struct ip *ip = mtod(data, struct ip *);
1430 u_int ip_version = ip->ip_v;
1431 switch (ip_version) {
1432 case 4: {
1433 af = AF_INET;
1434 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1435 ip->ip_off = ntohs(ip->ip_off);
1436
1437 if (length < ip->ip_len) {
1438 printf("ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1439 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1440 STATS_INC(nifs, NETIF_STATS_BADLEN);
1441 STATS_INC(nifs, NETIF_STATS_DROPPED);
1442 mbuf_freem(data);
1443 data = NULL;
1444 } else {
1445 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1446 }
1447 break;
1448 }
1449 case 6: {
1450 if (length < sizeof(struct ip6_hdr)) {
1451 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1452 pcb->ipsec_ifp->if_xname, length);
1453 STATS_INC(nifs, NETIF_STATS_BADLEN);
1454 STATS_INC(nifs, NETIF_STATS_DROPPED);
1455 mbuf_freem(data);
1456 data = NULL;
1457 } else {
1458 af = AF_INET6;
1459 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1460 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1461 if (length < ip6_len) {
1462 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1463 pcb->ipsec_ifp->if_xname, length, ip6_len);
1464 STATS_INC(nifs, NETIF_STATS_BADLEN);
1465 STATS_INC(nifs, NETIF_STATS_DROPPED);
1466 mbuf_freem(data);
1467 data = NULL;
1468 } else {
1469 int offset = sizeof(struct ip6_hdr);
1470 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1471 }
1472 }
1473 break;
1474 }
1475 default: {
1476 printf("ipsec_netif_sync_rx %s: unknown ip version %u\n",
1477 pcb->ipsec_ifp->if_xname, ip_version);
1478 STATS_INC(nifs, NETIF_STATS_DROPPED);
1479 mbuf_freem(data);
1480 data = NULL;
1481 break;
1482 }
1483 }
1484 } else {
1485 printf("ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1486 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1487 STATS_INC(nifs, NETIF_STATS_DROPPED);
1488 mbuf_freem(data);
1489 data = NULL;
1490 }
1491 } else {
1492 printf("ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1493 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1494 STATS_INC(nifs, NETIF_STATS_DROPPED);
1495 }
1496 } else {
1497 printf("ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1498 STATS_INC(nifs, NETIF_STATS_BADLEN);
1499 STATS_INC(nifs, NETIF_STATS_DROPPED);
1500 }
1501
1502 if (data == NULL) {
1503 // Failed to get decrypted data data
1504 kern_pbufpool_free(rx_pp, rx_ph);
1505 continue;
1506 }
1507
1508 length = mbuf_pkthdr_len(data);
1509 if (length > rx_pp->pp_buflet_size) {
1510 // Flush data
1511 mbuf_freem(data);
1512 kern_pbufpool_free(rx_pp, rx_ph);
1513 STATS_INC(nifs, NETIF_STATS_BADLEN);
1514 STATS_INC(nifs, NETIF_STATS_DROPPED);
1515 printf("ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1516 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1517 continue;
1518 }
1519
1520 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1521
1522 // Fillout rx packet
1523 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1524 VERIFY(rx_buf != NULL);
1525 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1526 VERIFY(rx_baddr != NULL);
1527
1528 // Copy-in data from mbuf to buflet
1529 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1530 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1531
1532 // Finalize and attach the packet
1533 error = kern_buflet_set_data_offset(rx_buf, 0);
1534 VERIFY(error == 0);
1535 error = kern_buflet_set_data_length(rx_buf, length);
1536 VERIFY(error == 0);
1537 error = kern_packet_set_link_header_offset(rx_ph, 0);
1538 VERIFY(error == 0);
1539 error = kern_packet_set_network_header_offset(rx_ph, 0);
1540 VERIFY(error == 0);
1541 error = kern_packet_finalize(rx_ph);
1542 VERIFY(error == 0);
1543 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1544 VERIFY(error == 0);
1545
1546 STATS_INC(nifs, NETIF_STATS_RXPKTS);
1547 STATS_INC(nifs, NETIF_STATS_RXCOPY_DIRECT);
1548 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1549
1550 rx_ring_stats.kcrsi_slots_transferred++;
1551 rx_ring_stats.kcrsi_bytes_transferred += length;
1552
1553 if (!pcb->ipsec_ext_ifdata_stats) {
1554 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1555 }
1556
1557 mbuf_freem(data);
1558
1559 rx_pslot = rx_slot;
1560 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1561 }
1562
1563done:
1564 if (rx_pslot) {
1565 kern_channel_advance_slot(rx_ring, rx_pslot);
1566 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1567 }
1568
1569 if (tx_pslot) {
1570 kern_channel_advance_slot(tx_ring, tx_pslot);
1571 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1572 (void)kern_channel_reclaim(tx_ring);
1573 }
1574
1575 // Unlock first, then exit ring
1576 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1577 if (tx_ring != NULL) {
1578 if (tx_pslot != NULL) {
1579 kern_channel_notify(tx_ring, 0);
1580 }
1581 kr_exit(tx_ring);
1582 }
1583
1584 return 0;
1585}
1586
1587static errno_t
1588ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1589 struct ifnet_init_eparams *init_params,
1590 struct ifnet **ifp)
1591{
1592 errno_t err;
1593 nexus_controller_t controller = kern_nexus_shared_controller();
1594 struct kern_nexus_net_init net_init;
1595 struct kern_pbufpool_init pp_init;
1596
1597 nexus_name_t provider_name;
1598 snprintf((char *)provider_name, sizeof(provider_name),
1599 "com.apple.netif.%s", pcb->ipsec_if_xname);
1600
1601 struct kern_nexus_provider_init prov_init = {
1602 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1603 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1604 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1605 .nxpi_connected = ipsec_nexus_connected,
1606 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1607 .nxpi_disconnected = ipsec_nexus_disconnected,
1608 .nxpi_ring_init = ipsec_netif_ring_init,
1609 .nxpi_ring_fini = ipsec_netif_ring_fini,
1610 .nxpi_slot_init = NULL,
1611 .nxpi_slot_fini = NULL,
1612 .nxpi_sync_tx = ipsec_netif_sync_tx,
1613 .nxpi_sync_rx = ipsec_netif_sync_rx,
1614 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1615 };
1616
1617 nexus_attr_t nxa = NULL;
1618 err = kern_nexus_attr_create(&nxa);
1619 IPSEC_IF_VERIFY(err == 0);
1620 if (err != 0) {
1621 printf("%s: kern_nexus_attr_create failed: %d\n",
1622 __func__, err);
1623 goto failed;
1624 }
1625
1626 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1627 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1628 VERIFY(err == 0);
1629
1630 // Reset ring size for netif nexus to limit memory usage
1631 uint64_t ring_size = pcb->ipsec_netif_ring_size;
1632 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1633 VERIFY(err == 0);
1634 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1635 VERIFY(err == 0);
1636
1637 pcb->ipsec_netif_txring_size = ring_size;
1638
1639 bzero(&pp_init, sizeof (pp_init));
1640 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1641 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2;
1642 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1643 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1644 pp_init.kbi_max_frags = 1;
1645 (void) snprintf((char *)pp_init.kbi_name, sizeof (pp_init.kbi_name),
1646 "%s", provider_name);
1647
1648 err = kern_pbufpool_create(&pp_init, &pp_init, &pcb->ipsec_netif_pp, NULL);
1649 if (err != 0) {
1650 printf("%s pbufbool create failed, error %d\n", __func__, err);
1651 goto failed;
1652 }
1653
1654 err = kern_nexus_controller_register_provider(controller,
1655 ipsec_nx_dom_prov,
1656 provider_name,
1657 &prov_init,
1658 sizeof(prov_init),
1659 nxa,
1660 &pcb->ipsec_nx.if_provider);
1661 IPSEC_IF_VERIFY(err == 0);
1662 if (err != 0) {
1663 printf("%s register provider failed, error %d\n",
1664 __func__, err);
1665 goto failed;
1666 }
1667
1668 bzero(&net_init, sizeof(net_init));
1669 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1670 net_init.nxneti_flags = 0;
1671 net_init.nxneti_eparams = init_params;
1672 net_init.nxneti_lladdr = NULL;
1673 net_init.nxneti_prepare = ipsec_netif_prepare;
1674 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
1675 err = kern_nexus_controller_alloc_net_provider_instance(controller,
1676 pcb->ipsec_nx.if_provider,
1677 pcb,
1678 &pcb->ipsec_nx.if_instance,
1679 &net_init,
1680 ifp);
1681 IPSEC_IF_VERIFY(err == 0);
1682 if (err != 0) {
1683 printf("%s alloc_net_provider_instance failed, %d\n",
1684 __func__, err);
1685 kern_nexus_controller_deregister_provider(controller,
1686 pcb->ipsec_nx.if_provider);
1687 uuid_clear(pcb->ipsec_nx.if_provider);
1688 goto failed;
1689 }
1690
1691failed:
1692 if (nxa) {
1693 kern_nexus_attr_destroy(nxa);
1694 }
1695 if (err && pcb->ipsec_netif_pp != NULL) {
1696 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1697 pcb->ipsec_netif_pp = NULL;
1698 }
1699 return (err);
1700}
1701
1702static void
1703ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1704{
1705 nexus_controller_t controller = kern_nexus_shared_controller();
1706 errno_t err;
1707
1708 if (!uuid_is_null(instance)) {
1709 err = kern_nexus_controller_free_provider_instance(controller,
1710 instance);
1711 if (err != 0) {
1712 printf("%s free_provider_instance failed %d\n",
1713 __func__, err);
1714 }
1715 uuid_clear(instance);
1716 }
1717 if (!uuid_is_null(provider)) {
1718 err = kern_nexus_controller_deregister_provider(controller,
1719 provider);
1720 if (err != 0) {
1721 printf("%s deregister_provider %d\n", __func__, err);
1722 }
1723 uuid_clear(provider);
1724 }
1725 return;
1726}
1727
1728static void
1729ipsec_nexus_detach(struct ipsec_pcb *pcb)
1730{
1731 ipsec_nx_t nx = &pcb->ipsec_nx;
1732 nexus_controller_t controller = kern_nexus_shared_controller();
1733 errno_t err;
1734
1735 if (!uuid_is_null(nx->ms_host)) {
1736 err = kern_nexus_ifdetach(controller,
1737 nx->ms_instance,
1738 nx->ms_host);
1739 if (err != 0) {
1740 printf("%s: kern_nexus_ifdetach ms host failed %d\n",
1741 __func__, err);
1742 }
1743 }
1744
1745 if (!uuid_is_null(nx->ms_device)) {
1746 err = kern_nexus_ifdetach(controller,
1747 nx->ms_instance,
1748 nx->ms_device);
1749 if (err != 0) {
1750 printf("%s: kern_nexus_ifdetach ms device failed %d\n",
1751 __func__, err);
1752 }
1753 }
1754
1755 ipsec_detach_provider_and_instance(nx->if_provider,
1756 nx->if_instance);
1757 ipsec_detach_provider_and_instance(nx->ms_provider,
1758 nx->ms_instance);
1759
1760 if (pcb->ipsec_netif_pp != NULL) {
1761 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1762 pcb->ipsec_netif_pp = NULL;
1763
1764 }
1765 memset(nx, 0, sizeof(*nx));
1766}
1767
1768static errno_t
1769ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
1770 uint32_t subtype, const char *type_name,
1771 const char *ifname,
1772 uuid_t *provider, uuid_t *instance)
1773{
1774 nexus_attr_t attr = NULL;
1775 nexus_controller_t controller = kern_nexus_shared_controller();
1776 uuid_t dom_prov;
1777 errno_t err;
1778 struct kern_nexus_init init;
1779 nexus_name_t provider_name;
1780
1781 err = kern_nexus_get_builtin_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1782 &dom_prov);
1783 IPSEC_IF_VERIFY(err == 0);
1784 if (err != 0) {
1785 printf("%s can't get %s provider, error %d\n",
1786 __func__, type_name, err);
1787 goto failed;
1788 }
1789
1790 err = kern_nexus_attr_create(&attr);
1791 IPSEC_IF_VERIFY(err == 0);
1792 if (err != 0) {
1793 printf("%s: kern_nexus_attr_create failed: %d\n",
1794 __func__, err);
1795 goto failed;
1796 }
1797
1798 err = kern_nexus_attr_set(attr, NEXUS_ATTR_EXTENSIONS, subtype);
1799 VERIFY(err == 0);
1800
1801 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1802 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1803 VERIFY(err == 0);
1804
1805 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1806 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
1807 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1808 VERIFY(err == 0);
1809 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
1810 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1811 VERIFY(err == 0);
1812
1813 snprintf((char *)provider_name, sizeof(provider_name),
1814 "com.apple.%s.%s", type_name, ifname);
1815 err = kern_nexus_controller_register_provider(controller,
1816 dom_prov,
1817 provider_name,
1818 NULL,
1819 0,
1820 attr,
1821 provider);
1822 kern_nexus_attr_destroy(attr);
1823 attr = NULL;
1824 IPSEC_IF_VERIFY(err == 0);
1825 if (err != 0) {
1826 printf("%s register %s provider failed, error %d\n",
1827 __func__, type_name, err);
1828 goto failed;
1829 }
1830 bzero(&init, sizeof (init));
1831 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1832 err = kern_nexus_controller_alloc_provider_instance(controller,
1833 *provider,
1834 NULL,
1835 instance, &init);
1836 IPSEC_IF_VERIFY(err == 0);
1837 if (err != 0) {
1838 printf("%s alloc_provider_instance %s failed, %d\n",
1839 __func__, type_name, err);
1840 kern_nexus_controller_deregister_provider(controller,
1841 *provider);
1842 uuid_clear(*provider);
1843 }
1844failed:
1845 return (err);
1846}
1847
1848static errno_t
1849ipsec_multistack_attach(struct ipsec_pcb *pcb)
1850{
1851 nexus_controller_t controller = kern_nexus_shared_controller();
1852 errno_t err = 0;
1853 ipsec_nx_t nx = &pcb->ipsec_nx;
1854
1855 // Allocate multistack flowswitch
1856 err = ipsec_create_fs_provider_and_instance(pcb,
1857 NEXUS_EXTENSION_FSW_TYPE_MULTISTACK,
1858 "multistack",
1859 pcb->ipsec_ifp->if_xname,
1860 &nx->ms_provider,
1861 &nx->ms_instance);
1862 if (err != 0) {
1863 printf("%s: failed to create bridge provider and instance\n",
1864 __func__);
1865 goto failed;
1866 }
1867
1868 // Attach multistack to device port
1869 err = kern_nexus_ifattach(controller, nx->ms_instance,
1870 NULL, nx->if_instance,
1871 FALSE, &nx->ms_device);
1872 if (err != 0) {
1873 printf("%s kern_nexus_ifattach ms device %d\n", __func__, err);
1874 goto failed;
1875 }
1876
1877 // Attach multistack to host port
1878 err = kern_nexus_ifattach(controller, nx->ms_instance,
1879 NULL, nx->if_instance,
1880 TRUE, &nx->ms_host);
1881 if (err != 0) {
1882 printf("%s kern_nexus_ifattach ms host %d\n", __func__, err);
1883 goto failed;
1884 }
1885
1886 // Extract the agent UUID and save for later
1887 struct kern_nexus *multistack_nx = nx_find(nx->ms_instance, false);
1888 if (multistack_nx != NULL) {
1889 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(multistack_nx);
1890 if (flowswitch != NULL) {
1891 FSW_RLOCK(flowswitch);
1892 struct fsw_ms_context *ms_context = (struct fsw_ms_context *)flowswitch->fsw_ops_private;
1893 if (ms_context != NULL) {
1894 uuid_copy(nx->ms_agent, ms_context->mc_agent_uuid);
1895 } else {
1896 printf("ipsec_multistack_attach - fsw_ms_context is NULL\n");
1897 }
1898 FSW_UNLOCK(flowswitch);
1899 } else {
1900 printf("ipsec_multistack_attach - flowswitch is NULL\n");
1901 }
1902 nx_release(multistack_nx);
1903 } else {
1904 printf("ipsec_multistack_attach - unable to find multistack nexus\n");
1905 }
1906
1907 return (0);
1908
1909failed:
1910 ipsec_nexus_detach(pcb);
1911
1912 errno_t detach_error = 0;
1913 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
1914 panic("ipsec_multistack_attach - ifnet_detach failed: %d\n", detach_error);
1915 /* NOT REACHED */
1916 }
1917
1918 return (err);
1919}
1920
1921#pragma mark Kernel Pipe Nexus
1922
1923static errno_t
1924ipsec_register_kernel_pipe_nexus(void)
1925{
1926 nexus_attr_t nxa = NULL;
1927 errno_t result;
1928
1929 lck_mtx_lock(&ipsec_lock);
1930 if (ipsec_ncd_refcount++) {
1931 lck_mtx_unlock(&ipsec_lock);
1932 return 0;
1933 }
1934
1935 result = kern_nexus_controller_create(&ipsec_ncd);
1936 if (result) {
1937 printf("%s: kern_nexus_controller_create failed: %d\n",
1938 __FUNCTION__, result);
1939 goto done;
1940 }
1941
1942 uuid_t dom_prov;
1943 result = kern_nexus_get_builtin_domain_provider(
1944 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1945 if (result) {
1946 printf("%s: kern_nexus_get_builtin_domain_provider failed: %d\n",
1947 __FUNCTION__, result);
1948 goto done;
1949 }
1950
1951 struct kern_nexus_provider_init prov_init = {
1952 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1953 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1954 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1955 .nxpi_connected = ipsec_nexus_connected,
1956 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
1957 .nxpi_disconnected = ipsec_nexus_disconnected,
1958 .nxpi_ring_init = ipsec_kpipe_ring_init,
1959 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
1960 .nxpi_slot_init = NULL,
1961 .nxpi_slot_fini = NULL,
1962 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
1963 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
1964 .nxpi_tx_doorbell = NULL,
1965 };
1966
1967 result = kern_nexus_attr_create(&nxa);
1968 if (result) {
1969 printf("%s: kern_nexus_attr_create failed: %d\n",
1970 __FUNCTION__, result);
1971 goto done;
1972 }
1973
1974 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
1975 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1976 VERIFY(result == 0);
1977
1978 // Reset ring size for kernel pipe nexus to limit memory usage
1979 uint64_t ring_size = if_ipsec_ring_size;
1980 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1981 VERIFY(result == 0);
1982 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1983 VERIFY(result == 0);
1984
1985 result = kern_nexus_controller_register_provider(ipsec_ncd,
1986 dom_prov,
1987 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
1988 &prov_init,
1989 sizeof(prov_init),
1990 nxa,
1991 &ipsec_kpipe_uuid);
1992 if (result) {
1993 printf("%s: kern_nexus_controller_register_provider failed: %d\n",
1994 __FUNCTION__, result);
1995 goto done;
1996 }
1997
1998done:
1999 if (nxa) {
2000 kern_nexus_attr_destroy(nxa);
2001 }
2002
2003 if (result) {
2004 if (ipsec_ncd) {
2005 kern_nexus_controller_destroy(ipsec_ncd);
2006 ipsec_ncd = NULL;
2007 }
2008 ipsec_ncd_refcount = 0;
2009 }
2010
2011 lck_mtx_unlock(&ipsec_lock);
2012
2013 return result;
2014}
2015
2016static void
2017ipsec_unregister_kernel_pipe_nexus(void)
2018{
2019 lck_mtx_lock(&ipsec_lock);
2020
2021 VERIFY(ipsec_ncd_refcount > 0);
2022
2023 if (--ipsec_ncd_refcount == 0) {
2024 kern_nexus_controller_destroy(ipsec_ncd);
2025 ipsec_ncd = NULL;
2026 }
2027
2028 lck_mtx_unlock(&ipsec_lock);
2029}
2030
2031// For use by socket option, not internally
2032static errno_t
2033ipsec_disable_channel(struct ipsec_pcb *pcb)
2034{
2035 errno_t result;
2036 int enabled;
2037 uuid_t uuid;
2038
2039 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2040
2041 enabled = pcb->ipsec_kpipe_enabled;
2042 uuid_copy(uuid, pcb->ipsec_kpipe_uuid);
2043
2044 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid) == !enabled);
2045
2046 pcb->ipsec_kpipe_enabled = 0;
2047 uuid_clear(pcb->ipsec_kpipe_uuid);
2048
2049 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2050
2051 if (enabled) {
2052 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, uuid);
2053 } else {
2054 result = ENXIO;
2055 }
2056
2057 if (!result) {
2058 if (pcb->ipsec_kpipe_pp != NULL) {
2059 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2060 pcb->ipsec_kpipe_pp = NULL;
2061 }
2062 ipsec_unregister_kernel_pipe_nexus();
2063 }
2064
2065 return result;
2066}
2067
2068static errno_t
2069ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2070{
2071 struct kern_nexus_init init;
2072 struct kern_pbufpool_init pp_init;
2073 errno_t result;
2074
2075 result = ipsec_register_kernel_pipe_nexus();
2076 if (result) {
2077 return result;
2078 }
2079
2080 VERIFY(ipsec_ncd);
2081
2082 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2083
2084 /* ipsec driver doesn't support channels without a netif */
2085 if (!pcb->ipsec_use_netif) {
2086 result = EOPNOTSUPP;
2087 goto done;
2088 }
2089
2090 if (pcb->ipsec_kpipe_enabled) {
2091 result = EEXIST; // return success instead?
2092 goto done;
2093 }
2094
2095 bzero(&pp_init, sizeof (pp_init));
2096 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2097 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2;
2098 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2099 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2100 pp_init.kbi_max_frags = 1;
2101 pp_init.kbi_flags |= KBIF_QUANTUM;
2102 (void) snprintf((char *)pp_init.kbi_name, sizeof (pp_init.kbi_name),
2103 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2104
2105 result = kern_pbufpool_create(&pp_init, &pp_init, &pcb->ipsec_kpipe_pp,
2106 NULL);
2107 if (result != 0) {
2108 printf("%s pbufbool create failed, error %d\n", __func__, result);
2109 goto done;
2110 }
2111
2112 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid));
2113 bzero(&init, sizeof (init));
2114 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2115 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2116 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2117 ipsec_kpipe_uuid, pcb, &pcb->ipsec_kpipe_uuid, &init);
2118 if (result) {
2119 goto done;
2120 }
2121
2122 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2123 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2124 pcb->ipsec_kpipe_uuid, &port,
2125 proc_pid(proc), NULL, NULL, 0, NEXUS_BIND_PID);
2126 if (result) {
2127 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2128 pcb->ipsec_kpipe_uuid);
2129 uuid_clear(pcb->ipsec_kpipe_uuid);
2130 goto done;
2131 }
2132
2133 pcb->ipsec_kpipe_enabled = 1;
2134
2135done:
2136 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2137
2138 if (result) {
2139 if (pcb->ipsec_kpipe_pp != NULL) {
2140 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2141 pcb->ipsec_kpipe_pp = NULL;
2142 }
2143 ipsec_unregister_kernel_pipe_nexus();
2144 }
2145
2146 return result;
2147}
2148
2149#endif // IPSEC_NEXUS
2150
2151
2152/* Kernel control functions */
2153
2154static inline void
2155ipsec_free_pcb(struct ipsec_pcb *pcb, bool in_list)
2156{
2157#if IPSEC_NEXUS
2158 mbuf_freem_list(pcb->ipsec_input_chain);
2159 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, ipsec_lck_grp);
2160#endif // IPSEC_NEXUS
2161 lck_rw_destroy(&pcb->ipsec_pcb_lock, ipsec_lck_grp);
2162 if (in_list) {
2163 lck_mtx_lock(&ipsec_lock);
2164 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2165 lck_mtx_unlock(&ipsec_lock);
2166 }
2167 zfree(ipsec_pcb_zone, pcb);
2168}
2169
2170static errno_t
2171ipsec_ctl_bind(kern_ctl_ref kctlref,
2172 struct sockaddr_ctl *sac,
2173 void **unitinfo)
2174{
2175 struct ipsec_pcb *pcb = zalloc(ipsec_pcb_zone);
2176 memset(pcb, 0, sizeof(*pcb));
2177
2178 /* Setup the protocol control block */
2179 *unitinfo = pcb;
2180 pcb->ipsec_ctlref = kctlref;
2181 pcb->ipsec_unit = sac->sc_unit;
2182 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2183
2184#if IPSEC_NEXUS
2185 pcb->ipsec_use_netif = false;
2186 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2187 pcb->ipsec_netif_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
2188 pcb->ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
2189 pcb->ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
2190#endif // IPSEC_NEXUS
2191
2192 lck_rw_init(&pcb->ipsec_pcb_lock, ipsec_lck_grp, ipsec_lck_attr);
2193#if IPSEC_NEXUS
2194 lck_mtx_init(&pcb->ipsec_input_chain_lock, ipsec_lck_grp, ipsec_lck_attr);
2195#endif // IPSEC_NEXUS
2196
2197 return (0);
2198}
2199
2200static errno_t
2201ipsec_ctl_connect(kern_ctl_ref kctlref,
2202 struct sockaddr_ctl *sac,
2203 void **unitinfo)
2204{
2205 struct ifnet_init_eparams ipsec_init = {};
2206 errno_t result = 0;
2207
2208 if (*unitinfo == NULL) {
2209 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2210 }
2211
2212 struct ipsec_pcb *pcb = *unitinfo;
2213
2214 lck_mtx_lock(&ipsec_lock);
2215
2216 /* Find some open interface id */
2217 u_int32_t chosen_unique_id = 1;
2218 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2219 if (next_pcb != NULL) {
2220 /* List was not empty, add one to the last item */
2221 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2222 next_pcb = NULL;
2223
2224 /*
2225 * If this wrapped the id number, start looking at
2226 * the front of the list for an unused id.
2227 */
2228 if (chosen_unique_id == 0) {
2229 /* Find the next unused ID */
2230 chosen_unique_id = 1;
2231 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2232 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2233 /* We found a gap */
2234 break;
2235 }
2236
2237 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2238 }
2239 }
2240 }
2241
2242 pcb->ipsec_unique_id = chosen_unique_id;
2243
2244 if (next_pcb != NULL) {
2245 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2246 } else {
2247 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2248 }
2249 lck_mtx_unlock(&ipsec_lock);
2250
2251 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2252 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2253 printf("ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2254
2255 /* Create the interface */
2256 bzero(&ipsec_init, sizeof(ipsec_init));
2257 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2258 ipsec_init.len = sizeof (ipsec_init);
2259
2260#if IPSEC_NEXUS
2261 if (pcb->ipsec_use_netif) {
2262 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2263 } else
2264#endif // IPSEC_NEXUS
2265 {
2266 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2267 ipsec_init.start = ipsec_start;
2268 }
2269 ipsec_init.name = "ipsec";
2270 ipsec_init.unit = pcb->ipsec_unit - 1;
2271 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2272 ipsec_init.uniqueid_len = strlen(pcb->ipsec_unique_name);
2273 ipsec_init.family = ipsec_family;
2274 ipsec_init.subfamily = IFNET_SUBFAMILY_IPSEC;
2275 ipsec_init.type = IFT_OTHER;
2276 ipsec_init.demux = ipsec_demux;
2277 ipsec_init.add_proto = ipsec_add_proto;
2278 ipsec_init.del_proto = ipsec_del_proto;
2279 ipsec_init.softc = pcb;
2280 ipsec_init.ioctl = ipsec_ioctl;
2281 ipsec_init.detach = ipsec_detached;
2282
2283#if IPSEC_NEXUS
2284 if (pcb->ipsec_use_netif) {
2285 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2286 if (result != 0) {
2287 printf("ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2288 ipsec_free_pcb(pcb, true);
2289 *unitinfo = NULL;
2290 return result;
2291 }
2292
2293 result = ipsec_multistack_attach(pcb);
2294 if (result != 0) {
2295 printf("ipsec_ctl_connect - ipsec_multistack_attach failed: %d\n", result);
2296 *unitinfo = NULL;
2297 return result;
2298 }
2299
2300 /* Attach to bpf */
2301 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2302 } else
2303#endif // IPSEC_NEXUS
2304 {
2305 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2306 if (result != 0) {
2307 printf("ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2308 ipsec_free_pcb(pcb, true);
2309 *unitinfo = NULL;
2310 return result;
2311 }
2312 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2313
2314 /* Attach the interface */
2315 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2316 if (result != 0) {
2317 printf("ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2318 ifnet_release(pcb->ipsec_ifp);
2319 ipsec_free_pcb(pcb, true);
2320 *unitinfo = NULL;
2321 return (result);
2322 }
2323
2324 /* Attach to bpf */
2325 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2326 }
2327
2328 /* The interfaces resoures allocated, mark it as running */
2329 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2330
2331 return (0);
2332}
2333
2334static errno_t
2335ipsec_detach_ip(ifnet_t interface,
2336 protocol_family_t protocol,
2337 socket_t pf_socket)
2338{
2339 errno_t result = EPROTONOSUPPORT;
2340
2341 /* Attempt a detach */
2342 if (protocol == PF_INET) {
2343 struct ifreq ifr;
2344
2345 bzero(&ifr, sizeof(ifr));
2346 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2347 ifnet_name(interface), ifnet_unit(interface));
2348
2349 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2350 }
2351 else if (protocol == PF_INET6) {
2352 struct in6_ifreq ifr6;
2353
2354 bzero(&ifr6, sizeof(ifr6));
2355 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2356 ifnet_name(interface), ifnet_unit(interface));
2357
2358 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2359 }
2360
2361 return result;
2362}
2363
2364static void
2365ipsec_remove_address(ifnet_t interface,
2366 protocol_family_t protocol,
2367 ifaddr_t address,
2368 socket_t pf_socket)
2369{
2370 errno_t result = 0;
2371
2372 /* Attempt a detach */
2373 if (protocol == PF_INET) {
2374 struct ifreq ifr;
2375
2376 bzero(&ifr, sizeof(ifr));
2377 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2378 ifnet_name(interface), ifnet_unit(interface));
2379 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2380 if (result != 0) {
2381 printf("ipsec_remove_address - ifaddr_address failed: %d", result);
2382 }
2383 else {
2384 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2385 if (result != 0) {
2386 printf("ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2387 }
2388 }
2389 }
2390 else if (protocol == PF_INET6) {
2391 struct in6_ifreq ifr6;
2392
2393 bzero(&ifr6, sizeof(ifr6));
2394 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2395 ifnet_name(interface), ifnet_unit(interface));
2396 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2397 sizeof(ifr6.ifr_addr));
2398 if (result != 0) {
2399 printf("ipsec_remove_address - ifaddr_address failed (v6): %d",
2400 result);
2401 }
2402 else {
2403 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2404 if (result != 0) {
2405 printf("ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2406 result);
2407 }
2408 }
2409 }
2410}
2411
2412static void
2413ipsec_cleanup_family(ifnet_t interface,
2414 protocol_family_t protocol)
2415{
2416 errno_t result = 0;
2417 socket_t pf_socket = NULL;
2418 ifaddr_t *addresses = NULL;
2419 int i;
2420
2421 if (protocol != PF_INET && protocol != PF_INET6) {
2422 printf("ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2423 return;
2424 }
2425
2426 /* Create a socket for removing addresses and detaching the protocol */
2427 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2428 if (result != 0) {
2429 if (result != EAFNOSUPPORT)
2430 printf("ipsec_cleanup_family - failed to create %s socket: %d\n",
2431 protocol == PF_INET ? "IP" : "IPv6", result);
2432 goto cleanup;
2433 }
2434
2435 /* always set SS_PRIV, we want to close and detach regardless */
2436 sock_setpriv(pf_socket, 1);
2437
2438 result = ipsec_detach_ip(interface, protocol, pf_socket);
2439 if (result == 0 || result == ENXIO) {
2440 /* We are done! We either detached or weren't attached. */
2441 goto cleanup;
2442 }
2443 else if (result != EBUSY) {
2444 /* Uh, not really sure what happened here... */
2445 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2446 goto cleanup;
2447 }
2448
2449 /*
2450 * At this point, we received an EBUSY error. This means there are
2451 * addresses attached. We should detach them and then try again.
2452 */
2453 result = ifnet_get_address_list_family(interface, &addresses, protocol);
2454 if (result != 0) {
2455 printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2456 ifnet_name(interface), ifnet_unit(interface),
2457 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2458 goto cleanup;
2459 }
2460
2461 for (i = 0; addresses[i] != 0; i++) {
2462 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2463 }
2464 ifnet_free_address_list(addresses);
2465 addresses = NULL;
2466
2467 /*
2468 * The addresses should be gone, we should try the remove again.
2469 */
2470 result = ipsec_detach_ip(interface, protocol, pf_socket);
2471 if (result != 0 && result != ENXIO) {
2472 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2473 }
2474
2475cleanup:
2476 if (pf_socket != NULL)
2477 sock_close(pf_socket);
2478
2479 if (addresses != NULL)
2480 ifnet_free_address_list(addresses);
2481}
2482
2483static errno_t
2484ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2485 __unused u_int32_t unit,
2486 void *unitinfo)
2487{
2488 struct ipsec_pcb *pcb = unitinfo;
2489 ifnet_t ifp = NULL;
2490 errno_t result = 0;
2491
2492 if (pcb == NULL) {
2493 return EINVAL;
2494 }
2495
2496#if IPSEC_NEXUS
2497 // Tell the nexus to stop all rings
2498 if (pcb->ipsec_netif_nexus != NULL) {
2499 kern_nexus_stop(pcb->ipsec_netif_nexus);
2500 }
2501#endif // IPSEC_NEXUS
2502
2503 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2504
2505#if IPSEC_NEXUS
2506 uuid_t kpipe_uuid;
2507 uuid_copy(kpipe_uuid, pcb->ipsec_kpipe_uuid);
2508 uuid_clear(pcb->ipsec_kpipe_uuid);
2509 pcb->ipsec_kpipe_enabled = FALSE;
2510#endif // IPSEC_NEXUS
2511
2512 pcb->ipsec_ctlref = NULL;
2513
2514 ifp = pcb->ipsec_ifp;
2515 if (ifp != NULL) {
2516#if IPSEC_NEXUS
2517 if (pcb->ipsec_netif_nexus != NULL) {
2518 /*
2519 * Quiesce the interface and flush any pending outbound packets.
2520 */
2521 if_down(ifp);
2522
2523 /* Increment refcnt, but detach interface */
2524 ifnet_incr_iorefcnt(ifp);
2525 if ((result = ifnet_detach(ifp)) != 0) {
2526 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2527 /* NOT REACHED */
2528 }
2529
2530 /*
2531 * We want to do everything in our power to ensure that the interface
2532 * really goes away when the socket is closed. We must remove IP/IPv6
2533 * addresses and detach the protocols. Finally, we can remove and
2534 * release the interface.
2535 */
2536 key_delsp_for_ipsec_if(ifp);
2537
2538 ipsec_cleanup_family(ifp, AF_INET);
2539 ipsec_cleanup_family(ifp, AF_INET6);
2540
2541 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2542
2543 if (!uuid_is_null(kpipe_uuid)) {
2544 if (kern_nexus_controller_free_provider_instance(ipsec_ncd, kpipe_uuid) == 0) {
2545 if (pcb->ipsec_kpipe_pp != NULL) {
2546 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2547 pcb->ipsec_kpipe_pp = NULL;
2548 }
2549 ipsec_unregister_kernel_pipe_nexus();
2550 }
2551 }
2552 ipsec_nexus_detach(pcb);
2553
2554 /* Decrement refcnt to finish detaching and freeing */
2555 ifnet_decr_iorefcnt(ifp);
2556 } else
2557#endif // IPSEC_NEXUS
2558 {
2559 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2560
2561#if IPSEC_NEXUS
2562 if (!uuid_is_null(kpipe_uuid)) {
2563 if (kern_nexus_controller_free_provider_instance(ipsec_ncd, kpipe_uuid) == 0) {
2564 if (pcb->ipsec_kpipe_pp != NULL) {
2565 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2566 pcb->ipsec_kpipe_pp = NULL;
2567 }
2568 ipsec_unregister_kernel_pipe_nexus();
2569 }
2570 }
2571#endif // IPSEC_NEXUS
2572
2573 /*
2574 * We want to do everything in our power to ensure that the interface
2575 * really goes away when the socket is closed. We must remove IP/IPv6
2576 * addresses and detach the protocols. Finally, we can remove and
2577 * release the interface.
2578 */
2579 key_delsp_for_ipsec_if(ifp);
2580
2581 ipsec_cleanup_family(ifp, AF_INET);
2582 ipsec_cleanup_family(ifp, AF_INET6);
2583
2584 /*
2585 * Detach now; ipsec_detach() will be called asynchronously once
2586 * the I/O reference count drops to 0. There we will invoke
2587 * ifnet_release().
2588 */
2589 if ((result = ifnet_detach(ifp)) != 0) {
2590 printf("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2591 }
2592 }
2593 } else {
2594 // Bound, but not connected
2595 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2596 ipsec_free_pcb(pcb, false);
2597 }
2598
2599 return 0;
2600}
2601
2602static errno_t
2603ipsec_ctl_send(__unused kern_ctl_ref kctlref,
2604 __unused u_int32_t unit,
2605 __unused void *unitinfo,
2606 mbuf_t m,
2607 __unused int flags)
2608{
2609 /* Receive messages from the control socket. Currently unused. */
2610 mbuf_freem(m);
2611 return 0;
2612}
2613
2614static errno_t
2615ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
2616 __unused u_int32_t unit,
2617 void *unitinfo,
2618 int opt,
2619 void *data,
2620 size_t len)
2621{
2622 struct ipsec_pcb *pcb = unitinfo;
2623 errno_t result = 0;
2624
2625 /* check for privileges for privileged options */
2626 switch (opt) {
2627 case IPSEC_OPT_FLAGS:
2628 case IPSEC_OPT_EXT_IFDATA_STATS:
2629 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
2630 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
2631 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2632 return EPERM;
2633 }
2634 break;
2635 }
2636
2637 switch (opt) {
2638 case IPSEC_OPT_FLAGS:
2639 if (len != sizeof(u_int32_t)) {
2640 result = EMSGSIZE;
2641 } else {
2642 pcb->ipsec_flags = *(u_int32_t *)data;
2643 }
2644 break;
2645
2646 case IPSEC_OPT_EXT_IFDATA_STATS:
2647 if (len != sizeof(int)) {
2648 result = EMSGSIZE;
2649 break;
2650 }
2651 if (pcb->ipsec_ifp == NULL) {
2652 // Only can set after connecting
2653 result = EINVAL;
2654 break;
2655 }
2656 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2657 break;
2658
2659 case IPSEC_OPT_INC_IFDATA_STATS_IN:
2660 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
2661 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
2662
2663 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
2664 result = EINVAL;
2665 break;
2666 }
2667 if (pcb->ipsec_ifp == NULL) {
2668 // Only can set after connecting
2669 result = EINVAL;
2670 break;
2671 }
2672 if (!pcb->ipsec_ext_ifdata_stats) {
2673 result = EINVAL;
2674 break;
2675 }
2676 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN)
2677 ifnet_stat_increment_in(pcb->ipsec_ifp, utsp->utsp_packets,
2678 utsp->utsp_bytes, utsp->utsp_errors);
2679 else
2680 ifnet_stat_increment_out(pcb->ipsec_ifp, utsp->utsp_packets,
2681 utsp->utsp_bytes, utsp->utsp_errors);
2682 break;
2683 }
2684
2685 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
2686 ifnet_t del_ifp = NULL;
2687 char name[IFNAMSIZ];
2688
2689 if (len > IFNAMSIZ - 1) {
2690 result = EMSGSIZE;
2691 break;
2692 }
2693 if (pcb->ipsec_ifp == NULL) {
2694 // Only can set after connecting
2695 result = EINVAL;
2696 break;
2697 }
2698 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2699 bcopy(data, name, len);
2700 name[len] = 0;
2701 result = ifnet_find_by_name(name, &del_ifp);
2702 }
2703 if (result == 0) {
2704 printf("%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
2705 __func__, pcb->ipsec_ifp->if_xname,
2706 del_ifp->if_xname);
2707
2708 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
2709 if (del_ifp)
2710 ifnet_release(del_ifp);
2711 }
2712 break;
2713 }
2714
2715 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
2716 if (len != sizeof(int)) {
2717 result = EMSGSIZE;
2718 break;
2719 }
2720 if (pcb->ipsec_ifp == NULL) {
2721 // Only can set after connecting
2722 result = EINVAL;
2723 break;
2724 }
2725 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
2726 if (output_service_class == MBUF_SC_UNSPEC) {
2727 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2728 } else {
2729 pcb->ipsec_output_service_class = output_service_class;
2730 }
2731 printf("%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
2732 __func__, pcb->ipsec_ifp->if_xname,
2733 pcb->ipsec_output_service_class);
2734 break;
2735 }
2736
2737#if IPSEC_NEXUS
2738 case IPSEC_OPT_ENABLE_CHANNEL: {
2739 if (len != sizeof(int)) {
2740 result = EMSGSIZE;
2741 break;
2742 }
2743 if (pcb->ipsec_ifp == NULL) {
2744 // Only can set after connecting
2745 result = EINVAL;
2746 break;
2747 }
2748 if (*(int *)data) {
2749 result = ipsec_enable_channel(pcb, current_proc());
2750 } else {
2751 result = ipsec_disable_channel(pcb);
2752 }
2753 break;
2754 }
2755
2756 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
2757 if (len != sizeof(int)) {
2758 result = EMSGSIZE;
2759 break;
2760 }
2761 if (pcb->ipsec_ifp == NULL) {
2762 // Only can set after connecting
2763 result = EINVAL;
2764 break;
2765 }
2766 if (!if_is_netagent_enabled()) {
2767 result = ENOTSUP;
2768 break;
2769 }
2770 if (uuid_is_null(pcb->ipsec_nx.ms_agent)) {
2771 result = ENOENT;
2772 break;
2773 }
2774
2775 if (*(int *)data) {
2776 if_add_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2777 pcb->ipsec_needs_netagent = true;
2778 } else {
2779 pcb->ipsec_needs_netagent = false;
2780 if_delete_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2781 }
2782 break;
2783 }
2784
2785 case IPSEC_OPT_INPUT_FRAG_SIZE: {
2786 if (len != sizeof(u_int32_t)) {
2787 result = EMSGSIZE;
2788 break;
2789 }
2790 u_int32_t input_frag_size = *(u_int32_t *)data;
2791 if (input_frag_size <= sizeof(struct ip6_hdr)) {
2792 pcb->ipsec_frag_size_set = FALSE;
2793 pcb->ipsec_input_frag_size = 0;
2794 } else {
2795 printf("SET FRAG SIZE TO %u\n", input_frag_size);
2796 pcb->ipsec_frag_size_set = TRUE;
2797 pcb->ipsec_input_frag_size = input_frag_size;
2798 }
2799 break;
2800 }
2801 case IPSEC_OPT_ENABLE_NETIF: {
2802 if (len != sizeof(int)) {
2803 result = EMSGSIZE;
2804 break;
2805 }
2806 if (pcb->ipsec_ifp != NULL) {
2807 // Only can set before connecting
2808 result = EINVAL;
2809 break;
2810 }
2811 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2812 pcb->ipsec_use_netif = !!(*(int *)data);
2813 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2814 break;
2815 }
2816 case IPSEC_OPT_SLOT_SIZE: {
2817 if (len != sizeof(u_int32_t)) {
2818 result = EMSGSIZE;
2819 break;
2820 }
2821 if (pcb->ipsec_ifp != NULL) {
2822 // Only can set before connecting
2823 result = EINVAL;
2824 break;
2825 }
2826 u_int32_t slot_size = *(u_int32_t *)data;
2827 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
2828 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
2829 return (EINVAL);
2830 }
2831 pcb->ipsec_slot_size = slot_size;
2832 break;
2833 }
2834 case IPSEC_OPT_NETIF_RING_SIZE: {
2835 if (len != sizeof(u_int32_t)) {
2836 result = EMSGSIZE;
2837 break;
2838 }
2839 if (pcb->ipsec_ifp != NULL) {
2840 // Only can set before connecting
2841 result = EINVAL;
2842 break;
2843 }
2844 u_int32_t ring_size = *(u_int32_t *)data;
2845 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
2846 ring_size > IPSEC_IF_MAX_RING_SIZE) {
2847 return (EINVAL);
2848 }
2849 pcb->ipsec_netif_ring_size = ring_size;
2850 break;
2851 }
2852 case IPSEC_OPT_TX_FSW_RING_SIZE: {
2853 if (len != sizeof(u_int32_t)) {
2854 result = EMSGSIZE;
2855 break;
2856 }
2857 if (pcb->ipsec_ifp != NULL) {
2858 // Only can set before connecting
2859 result = EINVAL;
2860 break;
2861 }
2862 u_int32_t ring_size = *(u_int32_t *)data;
2863 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
2864 ring_size > IPSEC_IF_MAX_RING_SIZE) {
2865 return (EINVAL);
2866 }
2867 pcb->ipsec_tx_fsw_ring_size = ring_size;
2868 break;
2869 }
2870 case IPSEC_OPT_RX_FSW_RING_SIZE: {
2871 if (len != sizeof(u_int32_t)) {
2872 result = EMSGSIZE;
2873 break;
2874 }
2875 if (pcb->ipsec_ifp != NULL) {
2876 // Only can set before connecting
2877 result = EINVAL;
2878 break;
2879 }
2880 u_int32_t ring_size = *(u_int32_t *)data;
2881 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
2882 ring_size > IPSEC_IF_MAX_RING_SIZE) {
2883 return (EINVAL);
2884 }
2885 pcb->ipsec_rx_fsw_ring_size = ring_size;
2886 break;
2887 }
2888
2889#endif // IPSEC_NEXUS
2890
2891 default:
2892 result = ENOPROTOOPT;
2893 break;
2894 }
2895
2896 return result;
2897}
2898
2899static errno_t
2900ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
2901 __unused u_int32_t unit,
2902 void *unitinfo,
2903 int opt,
2904 void *data,
2905 size_t *len)
2906{
2907 struct ipsec_pcb *pcb = unitinfo;
2908 errno_t result = 0;
2909
2910 switch (opt) {
2911 case IPSEC_OPT_FLAGS: {
2912 if (*len != sizeof(u_int32_t)) {
2913 result = EMSGSIZE;
2914 } else {
2915 *(u_int32_t *)data = pcb->ipsec_flags;
2916 }
2917 break;
2918 }
2919
2920 case IPSEC_OPT_EXT_IFDATA_STATS: {
2921 if (*len != sizeof(int)) {
2922 result = EMSGSIZE;
2923 } else {
2924 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
2925 }
2926 break;
2927 }
2928
2929 case IPSEC_OPT_IFNAME: {
2930 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
2931 result = EMSGSIZE;
2932 } else {
2933 if (pcb->ipsec_ifp == NULL) {
2934 // Only can get after connecting
2935 result = EINVAL;
2936 break;
2937 }
2938 *len = snprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
2939 }
2940 break;
2941 }
2942
2943 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
2944 if (*len != sizeof(int)) {
2945 result = EMSGSIZE;
2946 } else {
2947 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
2948 }
2949 break;
2950 }
2951
2952#if IPSEC_NEXUS
2953
2954 case IPSEC_OPT_ENABLE_CHANNEL: {
2955 if (*len != sizeof(int)) {
2956 result = EMSGSIZE;
2957 } else {
2958 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2959 *(int *)data = pcb->ipsec_kpipe_enabled;
2960 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2961 }
2962 break;
2963 }
2964
2965 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
2966 if (*len != sizeof(int)) {
2967 result = EMSGSIZE;
2968 } else {
2969 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2970 }
2971 break;
2972 }
2973
2974 case IPSEC_OPT_ENABLE_NETIF: {
2975 if (*len != sizeof(int)) {
2976 result = EMSGSIZE;
2977 } else {
2978 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2979 *(int *)data = !!pcb->ipsec_use_netif;
2980 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2981 }
2982 break;
2983 }
2984
2985 case IPSEC_OPT_GET_CHANNEL_UUID: {
2986 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2987 if (uuid_is_null(pcb->ipsec_kpipe_uuid)) {
2988 result = ENXIO;
2989 } else if (*len != sizeof(uuid_t)) {
2990 result = EMSGSIZE;
2991 } else {
2992 uuid_copy(data, pcb->ipsec_kpipe_uuid);
2993 }
2994 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2995 break;
2996 }
2997
2998 case IPSEC_OPT_INPUT_FRAG_SIZE: {
2999 if (*len != sizeof(u_int32_t)) {
3000 result = EMSGSIZE;
3001 } else {
3002 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
3003 }
3004 break;
3005 }
3006 case IPSEC_OPT_SLOT_SIZE: {
3007 if (*len != sizeof(u_int32_t)) {
3008 result = EMSGSIZE;
3009 } else {
3010 *(u_int32_t *)data = pcb->ipsec_slot_size;
3011 }
3012 break;
3013 }
3014 case IPSEC_OPT_NETIF_RING_SIZE: {
3015 if (*len != sizeof(u_int32_t)) {
3016 result = EMSGSIZE;
3017 } else {
3018 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3019 }
3020 break;
3021 }
3022 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3023 if (*len != sizeof(u_int32_t)) {
3024 result = EMSGSIZE;
3025 } else {
3026 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3027 }
3028 break;
3029 }
3030 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3031 if (*len != sizeof(u_int32_t)) {
3032 result = EMSGSIZE;
3033 } else {
3034 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3035 }
3036 break;
3037 }
3038
3039#endif // IPSEC_NEXUS
3040
3041 default: {
3042 result = ENOPROTOOPT;
3043 break;
3044 }
3045 }
3046
3047 return result;
3048}
3049
3050/* Network Interface functions */
3051static errno_t
3052ipsec_output(ifnet_t interface,
3053 mbuf_t data)
3054{
3055 struct ipsec_pcb *pcb = ifnet_softc(interface);
3056 struct ipsec_output_state ipsec_state;
3057 struct route ro;
3058 struct route_in6 ro6;
3059 int length;
3060 struct ip *ip;
3061 struct ip6_hdr *ip6;
3062 struct ip_out_args ipoa;
3063 struct ip6_out_args ip6oa;
3064 int error = 0;
3065 u_int ip_version = 0;
3066 int flags = 0;
3067 struct flowadv *adv = NULL;
3068
3069 // Make sure this packet isn't looping through the interface
3070 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3071 error = EINVAL;
3072 goto ipsec_output_err;
3073 }
3074
3075 // Mark the interface so NECP can evaluate tunnel policy
3076 necp_mark_packet_from_interface(data, interface);
3077
3078 ip = mtod(data, struct ip *);
3079 ip_version = ip->ip_v;
3080
3081 switch (ip_version) {
3082 case 4: {
3083#if IPSEC_NEXUS
3084 if (!pcb->ipsec_use_netif)
3085#endif // IPSEC_NEXUS
3086 {
3087 int af = AF_INET;
3088 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3089 }
3090
3091 /* Apply encryption */
3092 memset(&ipsec_state, 0, sizeof(ipsec_state));
3093 ipsec_state.m = data;
3094 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3095 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3096
3097 error = ipsec4_interface_output(&ipsec_state, interface);
3098 /* Tunneled in IPv6 - packet is gone */
3099 if (error == 0 && ipsec_state.tunneled == 6) {
3100 goto done;
3101 }
3102
3103 data = ipsec_state.m;
3104 if (error || data == NULL) {
3105 if (error) {
3106 printf("ipsec_output: ipsec4_output error %d.\n", error);
3107 }
3108 goto ipsec_output_err;
3109 }
3110
3111 /* Set traffic class, set flow */
3112 m_set_service_class(data, pcb->ipsec_output_service_class);
3113 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3114 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3115 data->m_pkthdr.pkt_proto = ip->ip_p;
3116 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3117
3118 /* Flip endian-ness for ip_output */
3119 ip = mtod(data, struct ip *);
3120 NTOHS(ip->ip_len);
3121 NTOHS(ip->ip_off);
3122
3123 /* Increment statistics */
3124 length = mbuf_pkthdr_len(data);
3125 ifnet_stat_increment_out(interface, 1, length, 0);
3126
3127 /* Send to ip_output */
3128 memset(&ro, 0, sizeof(ro));
3129
3130 flags = (IP_OUTARGS | /* Passing out args to specify interface */
3131 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
3132
3133 memset(&ipoa, 0, sizeof(ipoa));
3134 ipoa.ipoa_flowadv.code = 0;
3135 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3136 if (ipsec_state.outgoing_if) {
3137 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3138 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3139 }
3140 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3141
3142 adv = &ipoa.ipoa_flowadv;
3143
3144 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3145 data = NULL;
3146
3147 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3148 error = ENOBUFS;
3149 ifnet_disable_output(interface);
3150 }
3151
3152 goto done;
3153 }
3154 case 6: {
3155#if IPSEC_NEXUS
3156 if (!pcb->ipsec_use_netif)
3157#endif // IPSEC_NEXUS
3158 {
3159 int af = AF_INET6;
3160 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3161 }
3162
3163 data = ipsec6_splithdr(data);
3164 if (data == NULL) {
3165 printf("ipsec_output: ipsec6_splithdr returned NULL\n");
3166 goto ipsec_output_err;
3167 }
3168
3169 ip6 = mtod(data, struct ip6_hdr *);
3170
3171 memset(&ipsec_state, 0, sizeof(ipsec_state));
3172 ipsec_state.m = data;
3173 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3174 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3175
3176 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3177 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
3178 goto done;
3179 }
3180 data = ipsec_state.m;
3181 if (error || data == NULL) {
3182 if (error) {
3183 printf("ipsec_output: ipsec6_output error %d\n", error);
3184 }
3185 goto ipsec_output_err;
3186 }
3187
3188 /* Set traffic class, set flow */
3189 m_set_service_class(data, pcb->ipsec_output_service_class);
3190 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3191 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3192 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3193 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3194
3195 /* Increment statistics */
3196 length = mbuf_pkthdr_len(data);
3197 ifnet_stat_increment_out(interface, 1, length, 0);
3198
3199 /* Send to ip6_output */
3200 memset(&ro6, 0, sizeof(ro6));
3201
3202 flags = IPV6_OUTARGS;
3203
3204 memset(&ip6oa, 0, sizeof(ip6oa));
3205 ip6oa.ip6oa_flowadv.code = 0;
3206 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3207 if (ipsec_state.outgoing_if) {
3208 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3209 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3210 }
3211 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3212
3213 adv = &ip6oa.ip6oa_flowadv;
3214
3215 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3216 data = NULL;
3217
3218 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3219 error = ENOBUFS;
3220 ifnet_disable_output(interface);
3221 }
3222
3223 goto done;
3224 }
3225 default: {
3226 printf("ipsec_output: Received unknown packet version %d.\n", ip_version);
3227 error = EINVAL;
3228 goto ipsec_output_err;
3229 }
3230 }
3231
3232done:
3233 return error;
3234
3235ipsec_output_err:
3236 if (data)
3237 mbuf_freem(data);
3238 goto done;
3239}
3240
3241static void
3242ipsec_start(ifnet_t interface)
3243{
3244 mbuf_t data;
3245 struct ipsec_pcb *pcb = ifnet_softc(interface);
3246
3247 VERIFY(pcb != NULL);
3248 for (;;) {
3249 if (ifnet_dequeue(interface, &data) != 0)
3250 break;
3251 if (ipsec_output(interface, data) != 0)
3252 break;
3253 }
3254}
3255
3256/* Network Interface functions */
3257static errno_t
3258ipsec_demux(__unused ifnet_t interface,
3259 mbuf_t data,
3260 __unused char *frame_header,
3261 protocol_family_t *protocol)
3262{
3263 struct ip *ip;
3264 u_int ip_version;
3265
3266 while (data != NULL && mbuf_len(data) < 1) {
3267 data = mbuf_next(data);
3268 }
3269
3270 if (data == NULL)
3271 return ENOENT;
3272
3273 ip = mtod(data, struct ip *);
3274 ip_version = ip->ip_v;
3275
3276 switch(ip_version) {
3277 case 4:
3278 *protocol = PF_INET;
3279 return 0;
3280 case 6:
3281 *protocol = PF_INET6;
3282 return 0;
3283 default:
3284 break;
3285 }
3286
3287 return 0;
3288}
3289
3290static errno_t
3291ipsec_add_proto(__unused ifnet_t interface,
3292 protocol_family_t protocol,
3293 __unused const struct ifnet_demux_desc *demux_array,
3294 __unused u_int32_t demux_count)
3295{
3296 switch(protocol) {
3297 case PF_INET:
3298 return 0;
3299 case PF_INET6:
3300 return 0;
3301 default:
3302 break;
3303 }
3304
3305 return ENOPROTOOPT;
3306}
3307
3308static errno_t
3309ipsec_del_proto(__unused ifnet_t interface,
3310 __unused protocol_family_t protocol)
3311{
3312 return 0;
3313}
3314
3315static errno_t
3316ipsec_ioctl(ifnet_t interface,
3317 u_long command,
3318 void *data)
3319{
3320 errno_t result = 0;
3321
3322 switch(command) {
3323 case SIOCSIFMTU: {
3324#if IPSEC_NEXUS
3325 struct ipsec_pcb *pcb = ifnet_softc(interface);
3326 if (pcb->ipsec_use_netif) {
3327 // Make sure we can fit packets in the channel buffers
3328 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
3329 result = EINVAL;
3330 } else {
3331 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
3332 }
3333 } else
3334#endif // IPSEC_NEXUS
3335 {
3336 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
3337 }
3338 break;
3339 }
3340
3341 case SIOCSIFFLAGS:
3342 /* ifioctl() takes care of it */
3343 break;
3344
3345 default:
3346 result = EOPNOTSUPP;
3347 }
3348
3349 return result;
3350}
3351
3352static void
3353ipsec_detached(ifnet_t interface)
3354{
3355 struct ipsec_pcb *pcb = ifnet_softc(interface);
3356 (void)ifnet_release(interface);
3357 ipsec_free_pcb(pcb, true);
3358}
3359
3360/* Protocol Handlers */
3361
3362static errno_t
3363ipsec_proto_input(ifnet_t interface,
3364 protocol_family_t protocol,
3365 mbuf_t m,
3366 __unused char *frame_header)
3367{
3368 mbuf_pkthdr_setrcvif(m, interface);
3369
3370#if IPSEC_NEXUS
3371 struct ipsec_pcb *pcb = ifnet_softc(interface);
3372 if (!pcb->ipsec_use_netif)
3373#endif // IPSEC_NEXUS
3374 {
3375 uint32_t af = 0;
3376 struct ip *ip = mtod(m, struct ip *);
3377 if (ip->ip_v == 4) {
3378 af = AF_INET;
3379 } else if (ip->ip_v == 6) {
3380 af = AF_INET6;
3381 }
3382 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
3383 pktap_input(interface, protocol, m, NULL);
3384 }
3385
3386 int32_t pktlen = m->m_pkthdr.len;
3387 if (proto_input(protocol, m) != 0) {
3388 ifnet_stat_increment_in(interface, 0, 0, 1);
3389 m_freem(m);
3390 } else {
3391 ifnet_stat_increment_in(interface, 1, pktlen, 0);
3392 }
3393
3394 return 0;
3395}
3396
3397static errno_t
3398ipsec_proto_pre_output(__unused ifnet_t interface,
3399 protocol_family_t protocol,
3400 __unused mbuf_t *packet,
3401 __unused const struct sockaddr *dest,
3402 __unused void *route,
3403 __unused char *frame_type,
3404 __unused char *link_layer_dest)
3405{
3406
3407 *(protocol_family_t *)(void *)frame_type = protocol;
3408 return 0;
3409}
3410
3411static errno_t
3412ipsec_attach_proto(ifnet_t interface,
3413 protocol_family_t protocol)
3414{
3415 struct ifnet_attach_proto_param proto;
3416 errno_t result;
3417
3418 bzero(&proto, sizeof(proto));
3419 proto.input = ipsec_proto_input;
3420 proto.pre_output = ipsec_proto_pre_output;
3421
3422 result = ifnet_attach_protocol(interface, protocol, &proto);
3423 if (result != 0 && result != EEXIST) {
3424 printf("ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
3425 protocol, result);
3426 }
3427
3428 return result;
3429}
3430
3431errno_t
3432ipsec_inject_inbound_packet(ifnet_t interface,
3433 mbuf_t packet)
3434{
3435#if IPSEC_NEXUS
3436 struct ipsec_pcb *pcb = ifnet_softc(interface);
3437
3438 if (pcb->ipsec_use_netif) {
3439 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3440
3441 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
3442 if (pcb->ipsec_input_chain != NULL) {
3443 pcb->ipsec_input_chain_last->m_nextpkt = packet;
3444 } else {
3445 pcb->ipsec_input_chain = packet;
3446 }
3447 while (packet->m_nextpkt) {
3448 VERIFY(packet != packet->m_nextpkt);
3449 packet = packet->m_nextpkt;
3450 }
3451 pcb->ipsec_input_chain_last = packet;
3452 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
3453
3454 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring;
3455 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3456
3457 if (rx_ring != NULL) {
3458 kern_channel_notify(rx_ring, 0);
3459 }
3460
3461 return (0);
3462 } else
3463#endif // IPSEC_NEXUS
3464 {
3465 errno_t error;
3466 protocol_family_t protocol;
3467 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
3468 return error;
3469 }
3470
3471 return ipsec_proto_input(interface, protocol, packet, NULL);
3472 }
3473}
3474
3475void
3476ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family)
3477{
3478 if (packet != NULL && interface != NULL) {
3479 struct ipsec_pcb *pcb = ifnet_softc(interface);
3480 if (pcb != NULL) {
3481 /* Set traffic class, set flow */
3482 m_set_service_class(packet, pcb->ipsec_output_service_class);
3483 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3484 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
3485 if (family == AF_INET) {
3486 struct ip *ip = mtod(packet, struct ip *);
3487 packet->m_pkthdr.pkt_proto = ip->ip_p;
3488 } else if (family == AF_INET6) {
3489 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
3490 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3491 }
3492 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3493 }
3494 }
3495}
3496
3497void
3498ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
3499{
3500 struct ipsec_pcb *pcb;
3501
3502 if (interface == NULL || ipoa == NULL)
3503 return;
3504 pcb = ifnet_softc(interface);
3505
3506 if (net_qos_policy_restricted == 0) {
3507 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
3508 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
3509 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
3510 net_qos_policy_restrict_avapps != 0) {
3511 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
3512 } else {
3513 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
3514 ipoa->ipoa_sotc = SO_TC_VO;
3515 }
3516}
3517
3518void
3519ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
3520{
3521 struct ipsec_pcb *pcb;
3522
3523 if (interface == NULL || ip6oa == NULL)
3524 return;
3525 pcb = ifnet_softc(interface);
3526
3527 if (net_qos_policy_restricted == 0) {
3528 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
3529 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
3530 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
3531 net_qos_policy_restrict_avapps != 0) {
3532 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
3533 } else {
3534 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
3535 ip6oa->ip6oa_sotc = SO_TC_VO;
3536 }
3537}