]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/if_ipsec.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / net / if_ipsec.c
1 /*
2 * Copyright (c) 2012-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 #include <sys/systm.h>
31 #include <sys/kern_control.h>
32 #include <net/kpi_protocol.h>
33 #include <net/kpi_interface.h>
34 #include <sys/socket.h>
35 #include <sys/socketvar.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <net/bpf.h>
39 #include <net/if_ipsec.h>
40 #include <sys/mbuf.h>
41 #include <sys/sockio.h>
42 #include <netinet/in.h>
43 #include <netinet/ip6.h>
44 #include <netinet6/in6_var.h>
45 #include <netinet6/ip6_var.h>
46 #include <sys/kauth.h>
47 #include <netinet6/ipsec.h>
48 #include <netinet6/ipsec6.h>
49 #include <netinet6/esp.h>
50 #include <netinet6/esp6.h>
51 #include <netinet/ip.h>
52 #include <net/flowadv.h>
53 #include <net/necp.h>
54 #include <netkey/key.h>
55 #include <net/pktap.h>
56 #include <kern/zalloc.h>
57
58 #define IPSEC_NEXUS 0
59
60 extern int net_qos_policy_restricted;
61 extern int net_qos_policy_restrict_avapps;
62
63 /* Kernel Control functions */
64 static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
65 void **unitinfo);
66 static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
67 void **unitinfo);
68 static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
69 void *unitinfo);
70 static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
71 void *unitinfo, mbuf_t m, int flags);
72 static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
73 int opt, void *data, size_t *len);
74 static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
75 int opt, void *data, size_t len);
76
77 /* Network Interface functions */
78 static void ipsec_start(ifnet_t interface);
79 static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
80 static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
81 protocol_family_t *protocol);
82 static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
83 const struct ifnet_demux_desc *demux_array,
84 u_int32_t demux_count);
85 static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
86 static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
87 static void ipsec_detached(ifnet_t interface);
88
89 /* Protocol handlers */
90 static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
91 static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
92 mbuf_t m, char *frame_header);
93 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
94 mbuf_t *packet, const struct sockaddr *dest, void *route,
95 char *frame_type, char *link_layer_dest);
96
97 static kern_ctl_ref ipsec_kctlref;
98 static u_int32_t ipsec_family;
99 static lck_attr_t *ipsec_lck_attr;
100 static lck_grp_attr_t *ipsec_lck_grp_attr;
101 static lck_grp_t *ipsec_lck_grp;
102 static lck_mtx_t ipsec_lock;
103
104 #if IPSEC_NEXUS
105
106 SYSCTL_DECL(_net_ipsec);
107 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
108 static int if_ipsec_verify_interface_creation = 0;
109 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
110
111 #define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
112
113 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
114 #define IPSEC_IF_DEFAULT_RING_SIZE 64
115 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
116 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
117 #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
118
119 #define IPSEC_IF_MIN_RING_SIZE 16
120 #define IPSEC_IF_MAX_RING_SIZE 1024
121
122 #define IPSEC_IF_MIN_SLOT_SIZE 1024
123 #define IPSEC_IF_MAX_SLOT_SIZE 4096
124
125 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
126 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
127 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
128
129 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
130 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
131 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
132
133 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
134 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
135 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
136 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
137 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
138 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
139
140 static errno_t
141 ipsec_register_nexus(void);
142
143 typedef struct ipsec_nx {
144 uuid_t if_provider;
145 uuid_t if_instance;
146 uuid_t ms_provider;
147 uuid_t ms_instance;
148 uuid_t ms_device;
149 uuid_t ms_host;
150 uuid_t ms_agent;
151 } *ipsec_nx_t;
152
153 static nexus_controller_t ipsec_ncd;
154 static int ipsec_ncd_refcount;
155 static uuid_t ipsec_kpipe_uuid;
156
157 #endif // IPSEC_NEXUS
158
159 /* Control block allocated for each kernel control connection */
160 struct ipsec_pcb {
161 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
162 kern_ctl_ref ipsec_ctlref;
163 ifnet_t ipsec_ifp;
164 u_int32_t ipsec_unit;
165 u_int32_t ipsec_unique_id;
166 u_int32_t ipsec_flags;
167 u_int32_t ipsec_input_frag_size;
168 bool ipsec_frag_size_set;
169 int ipsec_ext_ifdata_stats;
170 mbuf_svc_class_t ipsec_output_service_class;
171 char ipsec_if_xname[IFXNAMSIZ];
172 char ipsec_unique_name[IFXNAMSIZ];
173 // PCB lock protects state fields, like ipsec_kpipe_enabled
174 decl_lck_rw_data(, ipsec_pcb_lock);
175
176 #if IPSEC_NEXUS
177 lck_mtx_t ipsec_input_chain_lock;
178 struct mbuf * ipsec_input_chain;
179 struct mbuf * ipsec_input_chain_last;
180 // Input chain lock protects the list of input mbufs
181 // The input chain lock must be taken AFTER the PCB lock if both are held
182 struct ipsec_nx ipsec_nx;
183 int ipsec_kpipe_enabled;
184 uuid_t ipsec_kpipe_uuid;
185 void * ipsec_kpipe_rxring;
186 void * ipsec_kpipe_txring;
187 kern_pbufpool_t ipsec_kpipe_pp;
188
189 kern_nexus_t ipsec_netif_nexus;
190 kern_pbufpool_t ipsec_netif_pp;
191 void * ipsec_netif_rxring;
192 void * ipsec_netif_txring;
193 uint64_t ipsec_netif_txring_size;
194
195 u_int32_t ipsec_slot_size;
196 u_int32_t ipsec_netif_ring_size;
197 u_int32_t ipsec_tx_fsw_ring_size;
198 u_int32_t ipsec_rx_fsw_ring_size;
199 bool ipsec_use_netif;
200 bool ipsec_needs_netagent;
201 #endif // IPSEC_NEXUS
202 };
203
204 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
205
206 #define IPSEC_PCB_ZONE_MAX 32
207 #define IPSEC_PCB_ZONE_NAME "net.if_ipsec"
208
209 static unsigned int ipsec_pcb_size; /* size of zone element */
210 static struct zone *ipsec_pcb_zone; /* zone for ipsec_pcb */
211
212 #define IPSECQ_MAXLEN 256
213
214 #if IPSEC_NEXUS
215 static int
216 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
217 {
218 #pragma unused(arg1, arg2)
219 int value = if_ipsec_ring_size;
220
221 int error = sysctl_handle_int(oidp, &value, 0, req);
222 if (error || !req->newptr) {
223 return error;
224 }
225
226 if (value < IPSEC_IF_MIN_RING_SIZE ||
227 value > IPSEC_IF_MAX_RING_SIZE) {
228 return EINVAL;
229 }
230
231 if_ipsec_ring_size = value;
232
233 return 0;
234 }
235
236 static int
237 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
238 {
239 #pragma unused(arg1, arg2)
240 int value = if_ipsec_tx_fsw_ring_size;
241
242 int error = sysctl_handle_int(oidp, &value, 0, req);
243 if (error || !req->newptr) {
244 return error;
245 }
246
247 if (value < IPSEC_IF_MIN_RING_SIZE ||
248 value > IPSEC_IF_MAX_RING_SIZE) {
249 return EINVAL;
250 }
251
252 if_ipsec_tx_fsw_ring_size = value;
253
254 return 0;
255 }
256
257 static int
258 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
259 {
260 #pragma unused(arg1, arg2)
261 int value = if_ipsec_rx_fsw_ring_size;
262
263 int error = sysctl_handle_int(oidp, &value, 0, req);
264 if (error || !req->newptr) {
265 return error;
266 }
267
268 if (value < IPSEC_IF_MIN_RING_SIZE ||
269 value > IPSEC_IF_MAX_RING_SIZE) {
270 return EINVAL;
271 }
272
273 if_ipsec_rx_fsw_ring_size = value;
274
275 return 0;
276 }
277 #endif // IPSEC_NEXUS
278
279 errno_t
280 ipsec_register_control(void)
281 {
282 struct kern_ctl_reg kern_ctl;
283 errno_t result = 0;
284
285 /* Find a unique value for our interface family */
286 result = mbuf_tag_id_find(IPSEC_CONTROL_NAME, &ipsec_family);
287 if (result != 0) {
288 printf("ipsec_register_control - mbuf_tag_id_find_internal failed: %d\n", result);
289 return result;
290 }
291
292 ipsec_pcb_size = sizeof(struct ipsec_pcb);
293 ipsec_pcb_zone = zinit(ipsec_pcb_size,
294 IPSEC_PCB_ZONE_MAX * ipsec_pcb_size,
295 0, IPSEC_PCB_ZONE_NAME);
296 if (ipsec_pcb_zone == NULL) {
297 printf("ipsec_register_control - zinit(ipsec_pcb) failed");
298 return ENOMEM;
299 }
300
301 #if IPSEC_NEXUS
302 ipsec_register_nexus();
303 #endif // IPSEC_NEXUS
304
305 TAILQ_INIT(&ipsec_head);
306
307 bzero(&kern_ctl, sizeof(kern_ctl));
308 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
309 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
310 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */
311 kern_ctl.ctl_sendsize = 64 * 1024;
312 kern_ctl.ctl_recvsize = 64 * 1024;
313 kern_ctl.ctl_bind = ipsec_ctl_bind;
314 kern_ctl.ctl_connect = ipsec_ctl_connect;
315 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
316 kern_ctl.ctl_send = ipsec_ctl_send;
317 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
318 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
319
320 result = ctl_register(&kern_ctl, &ipsec_kctlref);
321 if (result != 0) {
322 printf("ipsec_register_control - ctl_register failed: %d\n", result);
323 return result;
324 }
325
326 /* Register the protocol plumbers */
327 if ((result = proto_register_plumber(PF_INET, ipsec_family,
328 ipsec_attach_proto, NULL)) != 0) {
329 printf("ipsec_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n",
330 ipsec_family, result);
331 ctl_deregister(ipsec_kctlref);
332 return result;
333 }
334
335 /* Register the protocol plumbers */
336 if ((result = proto_register_plumber(PF_INET6, ipsec_family,
337 ipsec_attach_proto, NULL)) != 0) {
338 proto_unregister_plumber(PF_INET, ipsec_family);
339 ctl_deregister(ipsec_kctlref);
340 printf("ipsec_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n",
341 ipsec_family, result);
342 return result;
343 }
344
345 ipsec_lck_attr = lck_attr_alloc_init();
346 ipsec_lck_grp_attr = lck_grp_attr_alloc_init();
347 ipsec_lck_grp = lck_grp_alloc_init("ipsec", ipsec_lck_grp_attr);
348 lck_mtx_init(&ipsec_lock, ipsec_lck_grp, ipsec_lck_attr);
349
350 return 0;
351 }
352
353 /* Helpers */
354 int
355 ipsec_interface_isvalid(ifnet_t interface)
356 {
357 struct ipsec_pcb *pcb = NULL;
358
359 if (interface == NULL) {
360 return 0;
361 }
362
363 pcb = ifnet_softc(interface);
364
365 if (pcb == NULL) {
366 return 0;
367 }
368
369 /* When ctl disconnects, ipsec_unit is set to 0 */
370 if (pcb->ipsec_unit == 0) {
371 return 0;
372 }
373
374 return 1;
375 }
376
377 #if IPSEC_NEXUS
378 boolean_t
379 ipsec_interface_needs_netagent(ifnet_t interface)
380 {
381 struct ipsec_pcb *pcb = NULL;
382
383 if (interface == NULL) {
384 return FALSE;
385 }
386
387 pcb = ifnet_softc(interface);
388
389 if (pcb == NULL) {
390 return FALSE;
391 }
392
393 return pcb->ipsec_needs_netagent == true;
394 }
395 #endif // IPSEC_NEXUS
396
397 static errno_t
398 ipsec_ifnet_set_attrs(ifnet_t ifp)
399 {
400 /* Set flags and additional information. */
401 ifnet_set_mtu(ifp, 1500);
402 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
403
404 /* The interface must generate its own IPv6 LinkLocal address,
405 * if possible following the recommendation of RFC2472 to the 64bit interface ID
406 */
407 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
408
409 #if !IPSEC_NEXUS
410 /* Reset the stats in case as the interface may have been recycled */
411 struct ifnet_stats_param stats;
412 bzero(&stats, sizeof(struct ifnet_stats_param));
413 ifnet_set_stat(ifp, &stats);
414 #endif // !IPSEC_NEXUS
415
416 return 0;
417 }
418
419 #if IPSEC_NEXUS
420
421 static uuid_t ipsec_nx_dom_prov;
422
423 static errno_t
424 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
425 {
426 return 0;
427 }
428
429 static void
430 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
431 {
432 // Ignore
433 }
434
435 static errno_t
436 ipsec_register_nexus(void)
437 {
438 const struct kern_nexus_domain_provider_init dp_init = {
439 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
440 .nxdpi_flags = 0,
441 .nxdpi_init = ipsec_nxdp_init,
442 .nxdpi_fini = ipsec_nxdp_fini
443 };
444 errno_t err = 0;
445
446 /* ipsec_nxdp_init() is called before this function returns */
447 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
448 (const uint8_t *) "com.apple.ipsec",
449 &dp_init, sizeof(dp_init),
450 &ipsec_nx_dom_prov);
451 if (err != 0) {
452 printf("%s: failed to register domain provider\n", __func__);
453 return err;
454 }
455 return 0;
456 }
457
458 static errno_t
459 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
460 {
461 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
462 pcb->ipsec_netif_nexus = nexus;
463 return ipsec_ifnet_set_attrs(ifp);
464 }
465
466 static errno_t
467 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
468 proc_t p, kern_nexus_t nexus,
469 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
470 {
471 #pragma unused(nxprov, p)
472 #pragma unused(nexus, nexus_port, channel, ch_ctx)
473 return 0;
474 }
475
476 static errno_t
477 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
478 kern_channel_t channel)
479 {
480 #pragma unused(nxprov, channel)
481 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
482 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
483 return ok ? 0 : ENXIO;
484 }
485
486 static void
487 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
488 kern_channel_t channel)
489 {
490 #pragma unused(nxprov, nexus, channel)
491 }
492
493 static void
494 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
495 kern_channel_t channel)
496 {
497 #pragma unused(nxprov, nexus, channel)
498 }
499
500 static void
501 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
502 kern_channel_t channel)
503 {
504 #pragma unused(nxprov, channel)
505 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
506 if (pcb->ipsec_netif_nexus == nexus) {
507 pcb->ipsec_netif_nexus = NULL;
508 }
509 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
510 }
511
512 static errno_t
513 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
514 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
515 void **ring_ctx)
516 {
517 #pragma unused(nxprov)
518 #pragma unused(channel)
519 #pragma unused(ring_ctx)
520 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
521 if (!is_tx_ring) {
522 VERIFY(pcb->ipsec_kpipe_rxring == NULL);
523 pcb->ipsec_kpipe_rxring = ring;
524 } else {
525 VERIFY(pcb->ipsec_kpipe_txring == NULL);
526 pcb->ipsec_kpipe_txring = ring;
527 }
528 return 0;
529 }
530
531 static void
532 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
533 kern_channel_ring_t ring)
534 {
535 #pragma unused(nxprov)
536 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
537 if (pcb->ipsec_kpipe_rxring == ring) {
538 pcb->ipsec_kpipe_rxring = NULL;
539 } else if (pcb->ipsec_kpipe_txring == ring) {
540 pcb->ipsec_kpipe_txring = NULL;
541 }
542 }
543
544 static errno_t
545 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
546 kern_channel_ring_t tx_ring, uint32_t flags)
547 {
548 #pragma unused(nxprov)
549 #pragma unused(flags)
550 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
551
552 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
553 int channel_enabled = pcb->ipsec_kpipe_enabled;
554 if (!channel_enabled) {
555 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
556 return 0;
557 }
558
559 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
560 if (tx_slot == NULL) {
561 // Nothing to write, bail
562 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
563 return 0;
564 }
565
566 // Signal the netif ring to read
567 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring;
568 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
569
570 if (rx_ring != NULL) {
571 kern_channel_notify(rx_ring, 0);
572 }
573 return 0;
574 }
575
576 static mbuf_t
577 ipsec_encrypt_mbuf(ifnet_t interface,
578 mbuf_t data)
579 {
580 struct ipsec_output_state ipsec_state;
581 int error = 0;
582 uint32_t af;
583
584 // Make sure this packet isn't looping through the interface
585 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
586 error = -1;
587 goto ipsec_output_err;
588 }
589
590 // Mark the interface so NECP can evaluate tunnel policy
591 necp_mark_packet_from_interface(data, interface);
592
593 struct ip *ip = mtod(data, struct ip *);
594 u_int ip_version = ip->ip_v;
595
596 switch (ip_version) {
597 case 4: {
598 af = AF_INET;
599
600 memset(&ipsec_state, 0, sizeof(ipsec_state));
601 ipsec_state.m = data;
602 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
603 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
604
605 error = ipsec4_interface_output(&ipsec_state, interface);
606 if (error == 0 && ipsec_state.tunneled == 6) {
607 // Tunneled in IPv6 - packet is gone
608 // TODO: Don't lose mbuf
609 data = NULL;
610 goto done;
611 }
612
613 data = ipsec_state.m;
614 if (error || data == NULL) {
615 if (error) {
616 printf("ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
617 }
618 goto ipsec_output_err;
619 }
620 goto done;
621 }
622 case 6: {
623 af = AF_INET6;
624
625 data = ipsec6_splithdr(data);
626 if (data == NULL) {
627 printf("ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
628 goto ipsec_output_err;
629 }
630
631 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
632
633 memset(&ipsec_state, 0, sizeof(ipsec_state));
634 ipsec_state.m = data;
635 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
636 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
637
638 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
639 if (error == 0 && ipsec_state.tunneled == 4) {
640 // Tunneled in IPv4 - packet is gone
641 // TODO: Don't lose mbuf
642 data = NULL;
643 goto done;
644 }
645 data = ipsec_state.m;
646 if (error || data == NULL) {
647 if (error) {
648 printf("ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
649 }
650 goto ipsec_output_err;
651 }
652 goto done;
653 }
654 default: {
655 printf("ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
656 error = -1;
657 goto ipsec_output_err;
658 }
659 }
660
661 done:
662 return data;
663
664 ipsec_output_err:
665 if (data) {
666 mbuf_freem(data);
667 }
668 return NULL;
669 }
670
671 static errno_t
672 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
673 kern_channel_ring_t rx_ring, uint32_t flags)
674 {
675 #pragma unused(nxprov)
676 #pragma unused(flags)
677 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
678 struct kern_channel_ring_stat_increment rx_ring_stats;
679
680 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
681
682 int channel_enabled = pcb->ipsec_kpipe_enabled;
683 if (!channel_enabled) {
684 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
685 return 0;
686 }
687
688 // Reclaim user-released slots
689 (void) kern_channel_reclaim(rx_ring);
690
691 uint32_t avail = kern_channel_available_slot_count(rx_ring);
692 if (avail == 0) {
693 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
694 return 0;
695 }
696
697 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring;
698 if (tx_ring == NULL) {
699 // Net-If TX ring not set up yet, nothing to read
700 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
701 return 0;
702 }
703
704 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
705
706 // Unlock ipsec before entering ring
707 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
708
709 (void)kr_enter(tx_ring, TRUE);
710
711 // Lock again after entering and validate
712 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
713 if (tx_ring != pcb->ipsec_netif_txring) {
714 // Ring no longer valid
715 // Unlock first, then exit ring
716 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
717 kr_exit(tx_ring);
718 return 0;
719 }
720
721
722 struct kern_channel_ring_stat_increment tx_ring_stats;
723 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
724 kern_channel_slot_t tx_pslot = NULL;
725 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
726 if (tx_slot == NULL) {
727 // Nothing to read, don't bother signalling
728 // Unlock first, then exit ring
729 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
730 kr_exit(tx_ring);
731 return 0;
732 }
733
734 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
735 VERIFY(rx_pp != NULL);
736 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
737 kern_channel_slot_t rx_pslot = NULL;
738 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
739
740 while (rx_slot != NULL && tx_slot != NULL) {
741 size_t length = 0;
742 mbuf_t data = NULL;
743 errno_t error = 0;
744
745 // Allocate rx packet
746 kern_packet_t rx_ph = 0;
747 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
748 if (__improbable(error != 0)) {
749 printf("ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
750 pcb->ipsec_ifp->if_xname);
751 break;
752 }
753
754 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
755
756 // Advance TX ring
757 tx_pslot = tx_slot;
758 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
759
760 if (tx_ph == 0) {
761 kern_pbufpool_free(rx_pp, rx_ph);
762 continue;
763 }
764
765 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
766 VERIFY(tx_buf != NULL);
767 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
768 VERIFY(tx_baddr != NULL);
769 tx_baddr += kern_buflet_get_data_offset(tx_buf);
770
771 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
772
773 length = MIN(kern_packet_get_data_length(tx_ph),
774 pcb->ipsec_slot_size);
775
776 // Increment TX stats
777 tx_ring_stats.kcrsi_slots_transferred++;
778 tx_ring_stats.kcrsi_bytes_transferred += length;
779
780 if (length > 0) {
781 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
782 if (error == 0) {
783 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
784 if (error == 0) {
785 // Encrypt and send packet
786 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
787 } else {
788 printf("ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
789 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
790 STATS_INC(nifs, NETIF_STATS_DROPPED);
791 mbuf_freem(data);
792 data = NULL;
793 }
794 } else {
795 printf("ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
796 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
797 STATS_INC(nifs, NETIF_STATS_DROPPED);
798 }
799 } else {
800 printf("ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
801 STATS_INC(nifs, NETIF_STATS_BADLEN);
802 STATS_INC(nifs, NETIF_STATS_DROPPED);
803 }
804
805 if (data == NULL) {
806 printf("ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
807 kern_pbufpool_free(rx_pp, rx_ph);
808 break;
809 }
810
811 length = mbuf_pkthdr_len(data);
812 if (length > rx_pp->pp_buflet_size) {
813 // Flush data
814 mbuf_freem(data);
815 kern_pbufpool_free(rx_pp, rx_ph);
816 printf("ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
817 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
818 continue;
819 }
820
821 // Fillout rx packet
822 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
823 VERIFY(rx_buf != NULL);
824 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
825 VERIFY(rx_baddr != NULL);
826
827 // Copy-in data from mbuf to buflet
828 mbuf_copydata(data, 0, length, (void *)rx_baddr);
829 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
830
831 // Finalize and attach the packet
832 error = kern_buflet_set_data_offset(rx_buf, 0);
833 VERIFY(error == 0);
834 error = kern_buflet_set_data_length(rx_buf, length);
835 VERIFY(error == 0);
836 error = kern_packet_finalize(rx_ph);
837 VERIFY(error == 0);
838 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
839 VERIFY(error == 0);
840
841 STATS_INC(nifs, NETIF_STATS_TXPKTS);
842 STATS_INC(nifs, NETIF_STATS_TXCOPY_DIRECT);
843
844 rx_ring_stats.kcrsi_slots_transferred++;
845 rx_ring_stats.kcrsi_bytes_transferred += length;
846
847 if (!pcb->ipsec_ext_ifdata_stats) {
848 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, length, 0);
849 }
850
851 mbuf_freem(data);
852
853 rx_pslot = rx_slot;
854 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
855 }
856
857 if (rx_pslot) {
858 kern_channel_advance_slot(rx_ring, rx_pslot);
859 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
860 }
861
862 if (tx_pslot) {
863 kern_channel_advance_slot(tx_ring, tx_pslot);
864 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
865 (void)kern_channel_reclaim(tx_ring);
866 }
867
868 /* always reenable output */
869 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
870 if (error != 0) {
871 printf("ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
872 }
873
874 // Unlock first, then exit ring
875 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
876
877 if (tx_pslot != NULL) {
878 kern_channel_notify(tx_ring, 0);
879 }
880 kr_exit(tx_ring);
881
882 return 0;
883 }
884
885 static errno_t
886 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
887 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
888 void **ring_ctx)
889 {
890 #pragma unused(nxprov)
891 #pragma unused(channel)
892 #pragma unused(ring_ctx)
893 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
894 if (!is_tx_ring) {
895 VERIFY(pcb->ipsec_netif_rxring == NULL);
896 pcb->ipsec_netif_rxring = ring;
897 } else {
898 VERIFY(pcb->ipsec_netif_txring == NULL);
899 pcb->ipsec_netif_txring = ring;
900 }
901 return 0;
902 }
903
904 static void
905 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
906 kern_channel_ring_t ring)
907 {
908 #pragma unused(nxprov)
909 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
910 if (pcb->ipsec_netif_rxring == ring) {
911 pcb->ipsec_netif_rxring = NULL;
912 } else if (pcb->ipsec_netif_txring == ring) {
913 pcb->ipsec_netif_txring = NULL;
914 }
915 }
916
917 static bool
918 ipsec_netif_check_policy(mbuf_t data)
919 {
920 necp_kernel_policy_result necp_result = 0;
921 necp_kernel_policy_result_parameter necp_result_parameter = {};
922 uint32_t necp_matched_policy_id = 0;
923
924 // This packet has been marked with IP level policy, do not mark again.
925 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
926 return true;
927 }
928
929 size_t length = mbuf_pkthdr_len(data);
930 if (length < sizeof(struct ip)) {
931 return false;
932 }
933
934 struct ip *ip = mtod(data, struct ip *);
935 u_int ip_version = ip->ip_v;
936 switch (ip_version) {
937 case 4: {
938 necp_matched_policy_id = necp_ip_output_find_policy_match(data, 0, NULL,
939 &necp_result, &necp_result_parameter);
940 break;
941 }
942 case 6: {
943 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, 0, NULL,
944 &necp_result, &necp_result_parameter);
945 break;
946 }
947 default: {
948 return false;
949 }
950 }
951
952 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
953 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
954 /* Drop and flow divert packets should be blocked at the IP layer */
955 return false;
956 }
957
958 necp_mark_packet_from_ip(data, necp_matched_policy_id);
959 return true;
960 }
961
962 static errno_t
963 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
964 kern_channel_ring_t tx_ring, uint32_t flags)
965 {
966 #pragma unused(nxprov)
967 #pragma unused(flags)
968 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
969
970 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
971
972 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
973
974 struct kern_channel_ring_stat_increment tx_ring_stats;
975 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
976 kern_channel_slot_t tx_pslot = NULL;
977 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
978
979 STATS_INC(nifs, NETIF_STATS_TXSYNC);
980
981 if (tx_slot == NULL) {
982 // Nothing to write, don't bother signalling
983 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
984 return 0;
985 }
986
987 if (pcb->ipsec_kpipe_enabled) {
988 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring;
989 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
990
991 // Signal the kernel pipe ring to read
992 if (rx_ring != NULL) {
993 kern_channel_notify(rx_ring, 0);
994 }
995 return 0;
996 }
997
998 // If we're here, we're injecting into the BSD stack
999 while (tx_slot != NULL) {
1000 size_t length = 0;
1001 mbuf_t data = NULL;
1002
1003 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1004
1005 // Advance TX ring
1006 tx_pslot = tx_slot;
1007 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1008
1009 if (tx_ph == 0) {
1010 continue;
1011 }
1012
1013 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1014 VERIFY(tx_buf != NULL);
1015 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1016 VERIFY(tx_baddr != 0);
1017 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1018
1019 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1020
1021 length = MIN(kern_packet_get_data_length(tx_ph),
1022 pcb->ipsec_slot_size);
1023
1024 if (length > 0) {
1025 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1026 if (error == 0) {
1027 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1028 if (error == 0) {
1029 // Mark packet from policy
1030 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1031 necp_mark_packet_from_ip(data, policy_id);
1032
1033 // Check policy with NECP
1034 if (!ipsec_netif_check_policy(data)) {
1035 printf("ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1036 STATS_INC(nifs, NETIF_STATS_DROPPED);
1037 mbuf_freem(data);
1038 data = NULL;
1039 } else {
1040 // Send through encryption
1041 error = ipsec_output(pcb->ipsec_ifp, data);
1042 if (error != 0) {
1043 printf("ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1044 }
1045 }
1046 } else {
1047 printf("ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1048 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1049 STATS_INC(nifs, NETIF_STATS_DROPPED);
1050 mbuf_freem(data);
1051 data = NULL;
1052 }
1053 } else {
1054 printf("ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1055 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1056 STATS_INC(nifs, NETIF_STATS_DROPPED);
1057 }
1058 } else {
1059 printf("ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1060 STATS_INC(nifs, NETIF_STATS_BADLEN);
1061 STATS_INC(nifs, NETIF_STATS_DROPPED);
1062 }
1063
1064 if (data == NULL) {
1065 printf("ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1066 break;
1067 }
1068
1069 STATS_INC(nifs, NETIF_STATS_TXPKTS);
1070 STATS_INC(nifs, NETIF_STATS_TXCOPY_MBUF);
1071
1072 tx_ring_stats.kcrsi_slots_transferred++;
1073 tx_ring_stats.kcrsi_bytes_transferred += length;
1074 }
1075
1076 if (tx_pslot) {
1077 kern_channel_advance_slot(tx_ring, tx_pslot);
1078 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1079 (void)kern_channel_reclaim(tx_ring);
1080 }
1081
1082 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1083
1084 return 0;
1085 }
1086
1087 static errno_t
1088 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1089 kern_channel_ring_t ring, __unused uint32_t flags)
1090 {
1091 #pragma unused(nxprov)
1092 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1093 boolean_t more = false;
1094 errno_t rc = 0;
1095
1096 /*
1097 * Refill and sync the ring; we may be racing against another thread doing
1098 * an RX sync that also wants to do kr_enter(), and so use the blocking
1099 * variant here.
1100 */
1101 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1102 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1103 printf("%s, tx refill failed %d\n", __func__, rc);
1104 }
1105
1106 (void) kr_enter(ring, TRUE);
1107 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1108
1109 if (pcb->ipsec_kpipe_enabled) {
1110 uint32_t tx_available = kern_channel_available_slot_count(ring);
1111 if (pcb->ipsec_netif_txring_size > 0 &&
1112 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1113 // No room left in tx ring, disable output for now
1114 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1115 if (error != 0) {
1116 printf("ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1117 }
1118 }
1119 }
1120
1121 if (pcb->ipsec_kpipe_enabled) {
1122 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring;
1123
1124 // Unlock while calling notify
1125 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1126 // Signal the kernel pipe ring to read
1127 if (rx_ring != NULL) {
1128 kern_channel_notify(rx_ring, 0);
1129 }
1130 } else {
1131 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1132 }
1133
1134 kr_exit(ring);
1135
1136 return 0;
1137 }
1138
1139 static errno_t
1140 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1141 kern_channel_ring_t rx_ring, uint32_t flags)
1142 {
1143 #pragma unused(nxprov)
1144 #pragma unused(flags)
1145 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1146 struct kern_channel_ring_stat_increment rx_ring_stats;
1147
1148 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1149
1150 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1151
1152 // Reclaim user-released slots
1153 (void) kern_channel_reclaim(rx_ring);
1154
1155 STATS_INC(nifs, NETIF_STATS_RXSYNC);
1156
1157 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1158 if (avail == 0) {
1159 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1160 return 0;
1161 }
1162
1163 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1164 VERIFY(rx_pp != NULL);
1165 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1166 kern_channel_slot_t rx_pslot = NULL;
1167 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1168
1169 while (rx_slot != NULL) {
1170 // Check for a waiting packet
1171 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1172 mbuf_t data = pcb->ipsec_input_chain;
1173 if (data == NULL) {
1174 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1175 break;
1176 }
1177
1178 // Allocate rx packet
1179 kern_packet_t rx_ph = 0;
1180 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1181 if (__improbable(error != 0)) {
1182 STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
1183 STATS_INC(nifs, NETIF_STATS_DROPPED);
1184 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1185 break;
1186 }
1187
1188 // Advance waiting packets
1189 pcb->ipsec_input_chain = data->m_nextpkt;
1190 data->m_nextpkt = NULL;
1191 if (pcb->ipsec_input_chain == NULL) {
1192 pcb->ipsec_input_chain_last = NULL;
1193 }
1194 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1195
1196 size_t length = mbuf_pkthdr_len(data);
1197
1198 if (length < sizeof(struct ip)) {
1199 // Flush data
1200 mbuf_freem(data);
1201 kern_pbufpool_free(rx_pp, rx_ph);
1202 STATS_INC(nifs, NETIF_STATS_BADLEN);
1203 STATS_INC(nifs, NETIF_STATS_DROPPED);
1204 printf("ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1205 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1206 continue;
1207 }
1208
1209 uint32_t af = 0;
1210 struct ip *ip = mtod(data, struct ip *);
1211 u_int ip_version = ip->ip_v;
1212 switch (ip_version) {
1213 case 4: {
1214 af = AF_INET;
1215 break;
1216 }
1217 case 6: {
1218 af = AF_INET6;
1219 break;
1220 }
1221 default: {
1222 printf("ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1223 pcb->ipsec_ifp->if_xname, ip_version);
1224 break;
1225 }
1226 }
1227
1228 if (length > rx_pp->pp_buflet_size ||
1229 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1230 // We need to fragment to send up into the netif
1231
1232 u_int32_t fragment_mtu = rx_pp->pp_buflet_size;
1233 if (pcb->ipsec_frag_size_set &&
1234 pcb->ipsec_input_frag_size < rx_pp->pp_buflet_size) {
1235 fragment_mtu = pcb->ipsec_input_frag_size;
1236 }
1237
1238 mbuf_t fragment_chain = NULL;
1239 switch (af) {
1240 case AF_INET: {
1241 // ip_fragment expects the length in host order
1242 ip->ip_len = ntohs(ip->ip_len);
1243
1244 // ip_fragment will modify the original data, don't free
1245 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1246 if (fragment_error == 0 && data != NULL) {
1247 fragment_chain = data;
1248 } else {
1249 STATS_INC(nifs, NETIF_STATS_BADLEN);
1250 STATS_INC(nifs, NETIF_STATS_DROPPED);
1251 printf("ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1252 pcb->ipsec_ifp->if_xname, length, fragment_error);
1253 }
1254 break;
1255 }
1256 case AF_INET6: {
1257 if (length < sizeof(struct ip6_hdr)) {
1258 mbuf_freem(data);
1259 STATS_INC(nifs, NETIF_STATS_BADLEN);
1260 STATS_INC(nifs, NETIF_STATS_DROPPED);
1261 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1262 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1263 } else {
1264 // ip6_do_fragmentation will free the original data on success only
1265 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1266 struct ip6_exthdrs exthdrs;
1267 memset(&exthdrs, 0, sizeof(exthdrs));
1268
1269 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1270 ip6, &exthdrs, fragment_mtu, ip6->ip6_nxt);
1271 if (fragment_error == 0 && data != NULL) {
1272 fragment_chain = data;
1273 } else {
1274 mbuf_freem(data);
1275 STATS_INC(nifs, NETIF_STATS_BADLEN);
1276 STATS_INC(nifs, NETIF_STATS_DROPPED);
1277 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1278 pcb->ipsec_ifp->if_xname, length, fragment_error);
1279 }
1280 }
1281 break;
1282 }
1283 default: {
1284 // Cannot fragment unknown families
1285 mbuf_freem(data);
1286 STATS_INC(nifs, NETIF_STATS_BADLEN);
1287 STATS_INC(nifs, NETIF_STATS_DROPPED);
1288 printf("ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1289 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1290 break;
1291 }
1292 }
1293
1294 if (fragment_chain != NULL) {
1295 // Add fragments to chain before continuing
1296 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1297 if (pcb->ipsec_input_chain != NULL) {
1298 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1299 } else {
1300 pcb->ipsec_input_chain = fragment_chain;
1301 }
1302 while (fragment_chain->m_nextpkt) {
1303 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1304 fragment_chain = fragment_chain->m_nextpkt;
1305 }
1306 pcb->ipsec_input_chain_last = fragment_chain;
1307 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1308 }
1309
1310 // Make sure to free unused rx packet
1311 kern_pbufpool_free(rx_pp, rx_ph);
1312
1313 continue;
1314 }
1315
1316 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1317
1318 // Fillout rx packet
1319 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1320 VERIFY(rx_buf != NULL);
1321 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1322 VERIFY(rx_baddr != NULL);
1323
1324 // Copy-in data from mbuf to buflet
1325 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1326 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1327
1328 // Finalize and attach the packet
1329 error = kern_buflet_set_data_offset(rx_buf, 0);
1330 VERIFY(error == 0);
1331 error = kern_buflet_set_data_length(rx_buf, length);
1332 VERIFY(error == 0);
1333 error = kern_packet_set_link_header_offset(rx_ph, 0);
1334 VERIFY(error == 0);
1335 error = kern_packet_set_network_header_offset(rx_ph, 0);
1336 VERIFY(error == 0);
1337 error = kern_packet_finalize(rx_ph);
1338 VERIFY(error == 0);
1339 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1340 VERIFY(error == 0);
1341
1342 STATS_INC(nifs, NETIF_STATS_RXPKTS);
1343 STATS_INC(nifs, NETIF_STATS_RXCOPY_MBUF);
1344 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1345
1346 rx_ring_stats.kcrsi_slots_transferred++;
1347 rx_ring_stats.kcrsi_bytes_transferred += length;
1348
1349 if (!pcb->ipsec_ext_ifdata_stats) {
1350 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1351 }
1352
1353 mbuf_freem(data);
1354
1355 // Advance ring
1356 rx_pslot = rx_slot;
1357 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1358 }
1359
1360 struct kern_channel_ring_stat_increment tx_ring_stats;
1361 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1362 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring;
1363 kern_channel_slot_t tx_pslot = NULL;
1364 kern_channel_slot_t tx_slot = NULL;
1365 if (tx_ring == NULL) {
1366 // Net-If TX ring not set up yet, nothing to read
1367 goto done;
1368 }
1369
1370
1371 // Unlock ipsec before entering ring
1372 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1373
1374 (void)kr_enter(tx_ring, TRUE);
1375
1376 // Lock again after entering and validate
1377 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1378
1379 if (tx_ring != pcb->ipsec_kpipe_txring) {
1380 goto done;
1381 }
1382
1383 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1384 if (tx_slot == NULL) {
1385 // Nothing to read, don't bother signalling
1386 goto done;
1387 }
1388
1389 while (rx_slot != NULL && tx_slot != NULL) {
1390 size_t length = 0;
1391 mbuf_t data = NULL;
1392 errno_t error = 0;
1393 uint32_t af;
1394
1395 // Allocate rx packet
1396 kern_packet_t rx_ph = 0;
1397 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1398 if (__improbable(error != 0)) {
1399 STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
1400 STATS_INC(nifs, NETIF_STATS_DROPPED);
1401 break;
1402 }
1403
1404 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1405
1406 // Advance TX ring
1407 tx_pslot = tx_slot;
1408 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1409
1410 if (tx_ph == 0) {
1411 kern_pbufpool_free(rx_pp, rx_ph);
1412 continue;
1413 }
1414
1415 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1416 VERIFY(tx_buf != NULL);
1417 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1418 VERIFY(tx_baddr != 0);
1419 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1420
1421 length = MIN(kern_packet_get_data_length(tx_ph),
1422 pcb->ipsec_slot_size);
1423
1424 // Increment TX stats
1425 tx_ring_stats.kcrsi_slots_transferred++;
1426 tx_ring_stats.kcrsi_bytes_transferred += length;
1427
1428 if (length >= sizeof(struct ip)) {
1429 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1430 if (error == 0) {
1431 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1432 if (error == 0) {
1433 struct ip *ip = mtod(data, struct ip *);
1434 u_int ip_version = ip->ip_v;
1435 switch (ip_version) {
1436 case 4: {
1437 af = AF_INET;
1438 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1439 ip->ip_off = ntohs(ip->ip_off);
1440
1441 if (length < ip->ip_len) {
1442 printf("ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1443 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1444 STATS_INC(nifs, NETIF_STATS_BADLEN);
1445 STATS_INC(nifs, NETIF_STATS_DROPPED);
1446 mbuf_freem(data);
1447 data = NULL;
1448 } else {
1449 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1450 }
1451 break;
1452 }
1453 case 6: {
1454 if (length < sizeof(struct ip6_hdr)) {
1455 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1456 pcb->ipsec_ifp->if_xname, length);
1457 STATS_INC(nifs, NETIF_STATS_BADLEN);
1458 STATS_INC(nifs, NETIF_STATS_DROPPED);
1459 mbuf_freem(data);
1460 data = NULL;
1461 } else {
1462 af = AF_INET6;
1463 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1464 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1465 if (length < ip6_len) {
1466 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1467 pcb->ipsec_ifp->if_xname, length, ip6_len);
1468 STATS_INC(nifs, NETIF_STATS_BADLEN);
1469 STATS_INC(nifs, NETIF_STATS_DROPPED);
1470 mbuf_freem(data);
1471 data = NULL;
1472 } else {
1473 int offset = sizeof(struct ip6_hdr);
1474 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1475 }
1476 }
1477 break;
1478 }
1479 default: {
1480 printf("ipsec_netif_sync_rx %s: unknown ip version %u\n",
1481 pcb->ipsec_ifp->if_xname, ip_version);
1482 STATS_INC(nifs, NETIF_STATS_DROPPED);
1483 mbuf_freem(data);
1484 data = NULL;
1485 break;
1486 }
1487 }
1488 } else {
1489 printf("ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1490 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1491 STATS_INC(nifs, NETIF_STATS_DROPPED);
1492 mbuf_freem(data);
1493 data = NULL;
1494 }
1495 } else {
1496 printf("ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1497 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1498 STATS_INC(nifs, NETIF_STATS_DROPPED);
1499 }
1500 } else {
1501 printf("ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1502 STATS_INC(nifs, NETIF_STATS_BADLEN);
1503 STATS_INC(nifs, NETIF_STATS_DROPPED);
1504 }
1505
1506 if (data == NULL) {
1507 // Failed to get decrypted data data
1508 kern_pbufpool_free(rx_pp, rx_ph);
1509 continue;
1510 }
1511
1512 length = mbuf_pkthdr_len(data);
1513 if (length > rx_pp->pp_buflet_size) {
1514 // Flush data
1515 mbuf_freem(data);
1516 kern_pbufpool_free(rx_pp, rx_ph);
1517 STATS_INC(nifs, NETIF_STATS_BADLEN);
1518 STATS_INC(nifs, NETIF_STATS_DROPPED);
1519 printf("ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1520 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1521 continue;
1522 }
1523
1524 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1525
1526 // Fillout rx packet
1527 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1528 VERIFY(rx_buf != NULL);
1529 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1530 VERIFY(rx_baddr != NULL);
1531
1532 // Copy-in data from mbuf to buflet
1533 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1534 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1535
1536 // Finalize and attach the packet
1537 error = kern_buflet_set_data_offset(rx_buf, 0);
1538 VERIFY(error == 0);
1539 error = kern_buflet_set_data_length(rx_buf, length);
1540 VERIFY(error == 0);
1541 error = kern_packet_set_link_header_offset(rx_ph, 0);
1542 VERIFY(error == 0);
1543 error = kern_packet_set_network_header_offset(rx_ph, 0);
1544 VERIFY(error == 0);
1545 error = kern_packet_finalize(rx_ph);
1546 VERIFY(error == 0);
1547 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1548 VERIFY(error == 0);
1549
1550 STATS_INC(nifs, NETIF_STATS_RXPKTS);
1551 STATS_INC(nifs, NETIF_STATS_RXCOPY_DIRECT);
1552 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1553
1554 rx_ring_stats.kcrsi_slots_transferred++;
1555 rx_ring_stats.kcrsi_bytes_transferred += length;
1556
1557 if (!pcb->ipsec_ext_ifdata_stats) {
1558 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1559 }
1560
1561 mbuf_freem(data);
1562
1563 rx_pslot = rx_slot;
1564 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1565 }
1566
1567 done:
1568 if (rx_pslot) {
1569 kern_channel_advance_slot(rx_ring, rx_pslot);
1570 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1571 }
1572
1573 if (tx_pslot) {
1574 kern_channel_advance_slot(tx_ring, tx_pslot);
1575 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1576 (void)kern_channel_reclaim(tx_ring);
1577 }
1578
1579 // Unlock first, then exit ring
1580 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1581 if (tx_ring != NULL) {
1582 if (tx_pslot != NULL) {
1583 kern_channel_notify(tx_ring, 0);
1584 }
1585 kr_exit(tx_ring);
1586 }
1587
1588 return 0;
1589 }
1590
1591 static errno_t
1592 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1593 struct ifnet_init_eparams *init_params,
1594 struct ifnet **ifp)
1595 {
1596 errno_t err;
1597 nexus_controller_t controller = kern_nexus_shared_controller();
1598 struct kern_nexus_net_init net_init;
1599 struct kern_pbufpool_init pp_init;
1600
1601 nexus_name_t provider_name;
1602 snprintf((char *)provider_name, sizeof(provider_name),
1603 "com.apple.netif.%s", pcb->ipsec_if_xname);
1604
1605 struct kern_nexus_provider_init prov_init = {
1606 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1607 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1608 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1609 .nxpi_connected = ipsec_nexus_connected,
1610 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1611 .nxpi_disconnected = ipsec_nexus_disconnected,
1612 .nxpi_ring_init = ipsec_netif_ring_init,
1613 .nxpi_ring_fini = ipsec_netif_ring_fini,
1614 .nxpi_slot_init = NULL,
1615 .nxpi_slot_fini = NULL,
1616 .nxpi_sync_tx = ipsec_netif_sync_tx,
1617 .nxpi_sync_rx = ipsec_netif_sync_rx,
1618 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1619 };
1620
1621 nexus_attr_t nxa = NULL;
1622 err = kern_nexus_attr_create(&nxa);
1623 IPSEC_IF_VERIFY(err == 0);
1624 if (err != 0) {
1625 printf("%s: kern_nexus_attr_create failed: %d\n",
1626 __func__, err);
1627 goto failed;
1628 }
1629
1630 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1631 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1632 VERIFY(err == 0);
1633
1634 // Reset ring size for netif nexus to limit memory usage
1635 uint64_t ring_size = pcb->ipsec_netif_ring_size;
1636 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1637 VERIFY(err == 0);
1638 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1639 VERIFY(err == 0);
1640
1641 pcb->ipsec_netif_txring_size = ring_size;
1642
1643 bzero(&pp_init, sizeof(pp_init));
1644 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1645 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2;
1646 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1647 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1648 pp_init.kbi_max_frags = 1;
1649 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1650 "%s", provider_name);
1651
1652 err = kern_pbufpool_create(&pp_init, &pp_init, &pcb->ipsec_netif_pp, NULL);
1653 if (err != 0) {
1654 printf("%s pbufbool create failed, error %d\n", __func__, err);
1655 goto failed;
1656 }
1657
1658 err = kern_nexus_controller_register_provider(controller,
1659 ipsec_nx_dom_prov,
1660 provider_name,
1661 &prov_init,
1662 sizeof(prov_init),
1663 nxa,
1664 &pcb->ipsec_nx.if_provider);
1665 IPSEC_IF_VERIFY(err == 0);
1666 if (err != 0) {
1667 printf("%s register provider failed, error %d\n",
1668 __func__, err);
1669 goto failed;
1670 }
1671
1672 bzero(&net_init, sizeof(net_init));
1673 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1674 net_init.nxneti_flags = 0;
1675 net_init.nxneti_eparams = init_params;
1676 net_init.nxneti_lladdr = NULL;
1677 net_init.nxneti_prepare = ipsec_netif_prepare;
1678 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
1679 err = kern_nexus_controller_alloc_net_provider_instance(controller,
1680 pcb->ipsec_nx.if_provider,
1681 pcb,
1682 &pcb->ipsec_nx.if_instance,
1683 &net_init,
1684 ifp);
1685 IPSEC_IF_VERIFY(err == 0);
1686 if (err != 0) {
1687 printf("%s alloc_net_provider_instance failed, %d\n",
1688 __func__, err);
1689 kern_nexus_controller_deregister_provider(controller,
1690 pcb->ipsec_nx.if_provider);
1691 uuid_clear(pcb->ipsec_nx.if_provider);
1692 goto failed;
1693 }
1694
1695 failed:
1696 if (nxa) {
1697 kern_nexus_attr_destroy(nxa);
1698 }
1699 if (err && pcb->ipsec_netif_pp != NULL) {
1700 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1701 pcb->ipsec_netif_pp = NULL;
1702 }
1703 return err;
1704 }
1705
1706 static void
1707 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1708 {
1709 nexus_controller_t controller = kern_nexus_shared_controller();
1710 errno_t err;
1711
1712 if (!uuid_is_null(instance)) {
1713 err = kern_nexus_controller_free_provider_instance(controller,
1714 instance);
1715 if (err != 0) {
1716 printf("%s free_provider_instance failed %d\n",
1717 __func__, err);
1718 }
1719 uuid_clear(instance);
1720 }
1721 if (!uuid_is_null(provider)) {
1722 err = kern_nexus_controller_deregister_provider(controller,
1723 provider);
1724 if (err != 0) {
1725 printf("%s deregister_provider %d\n", __func__, err);
1726 }
1727 uuid_clear(provider);
1728 }
1729 return;
1730 }
1731
1732 static void
1733 ipsec_nexus_detach(struct ipsec_pcb *pcb)
1734 {
1735 ipsec_nx_t nx = &pcb->ipsec_nx;
1736 nexus_controller_t controller = kern_nexus_shared_controller();
1737 errno_t err;
1738
1739 if (!uuid_is_null(nx->ms_host)) {
1740 err = kern_nexus_ifdetach(controller,
1741 nx->ms_instance,
1742 nx->ms_host);
1743 if (err != 0) {
1744 printf("%s: kern_nexus_ifdetach ms host failed %d\n",
1745 __func__, err);
1746 }
1747 }
1748
1749 if (!uuid_is_null(nx->ms_device)) {
1750 err = kern_nexus_ifdetach(controller,
1751 nx->ms_instance,
1752 nx->ms_device);
1753 if (err != 0) {
1754 printf("%s: kern_nexus_ifdetach ms device failed %d\n",
1755 __func__, err);
1756 }
1757 }
1758
1759 ipsec_detach_provider_and_instance(nx->if_provider,
1760 nx->if_instance);
1761 ipsec_detach_provider_and_instance(nx->ms_provider,
1762 nx->ms_instance);
1763
1764 if (pcb->ipsec_netif_pp != NULL) {
1765 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1766 pcb->ipsec_netif_pp = NULL;
1767 }
1768 memset(nx, 0, sizeof(*nx));
1769 }
1770
1771 static errno_t
1772 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
1773 uint32_t subtype, const char *type_name,
1774 const char *ifname,
1775 uuid_t *provider, uuid_t *instance)
1776 {
1777 nexus_attr_t attr = NULL;
1778 nexus_controller_t controller = kern_nexus_shared_controller();
1779 uuid_t dom_prov;
1780 errno_t err;
1781 struct kern_nexus_init init;
1782 nexus_name_t provider_name;
1783
1784 err = kern_nexus_get_builtin_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1785 &dom_prov);
1786 IPSEC_IF_VERIFY(err == 0);
1787 if (err != 0) {
1788 printf("%s can't get %s provider, error %d\n",
1789 __func__, type_name, err);
1790 goto failed;
1791 }
1792
1793 err = kern_nexus_attr_create(&attr);
1794 IPSEC_IF_VERIFY(err == 0);
1795 if (err != 0) {
1796 printf("%s: kern_nexus_attr_create failed: %d\n",
1797 __func__, err);
1798 goto failed;
1799 }
1800
1801 err = kern_nexus_attr_set(attr, NEXUS_ATTR_EXTENSIONS, subtype);
1802 VERIFY(err == 0);
1803
1804 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1805 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1806 VERIFY(err == 0);
1807
1808 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1809 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
1810 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1811 VERIFY(err == 0);
1812 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
1813 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1814 VERIFY(err == 0);
1815
1816 snprintf((char *)provider_name, sizeof(provider_name),
1817 "com.apple.%s.%s", type_name, ifname);
1818 err = kern_nexus_controller_register_provider(controller,
1819 dom_prov,
1820 provider_name,
1821 NULL,
1822 0,
1823 attr,
1824 provider);
1825 kern_nexus_attr_destroy(attr);
1826 attr = NULL;
1827 IPSEC_IF_VERIFY(err == 0);
1828 if (err != 0) {
1829 printf("%s register %s provider failed, error %d\n",
1830 __func__, type_name, err);
1831 goto failed;
1832 }
1833 bzero(&init, sizeof(init));
1834 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1835 err = kern_nexus_controller_alloc_provider_instance(controller,
1836 *provider,
1837 NULL,
1838 instance, &init);
1839 IPSEC_IF_VERIFY(err == 0);
1840 if (err != 0) {
1841 printf("%s alloc_provider_instance %s failed, %d\n",
1842 __func__, type_name, err);
1843 kern_nexus_controller_deregister_provider(controller,
1844 *provider);
1845 uuid_clear(*provider);
1846 }
1847 failed:
1848 return err;
1849 }
1850
1851 static errno_t
1852 ipsec_multistack_attach(struct ipsec_pcb *pcb)
1853 {
1854 nexus_controller_t controller = kern_nexus_shared_controller();
1855 errno_t err = 0;
1856 ipsec_nx_t nx = &pcb->ipsec_nx;
1857
1858 // Allocate multistack flowswitch
1859 err = ipsec_create_fs_provider_and_instance(pcb,
1860 NEXUS_EXTENSION_FSW_TYPE_MULTISTACK,
1861 "multistack",
1862 pcb->ipsec_ifp->if_xname,
1863 &nx->ms_provider,
1864 &nx->ms_instance);
1865 if (err != 0) {
1866 printf("%s: failed to create bridge provider and instance\n",
1867 __func__);
1868 goto failed;
1869 }
1870
1871 // Attach multistack to device port
1872 err = kern_nexus_ifattach(controller, nx->ms_instance,
1873 NULL, nx->if_instance,
1874 FALSE, &nx->ms_device);
1875 if (err != 0) {
1876 printf("%s kern_nexus_ifattach ms device %d\n", __func__, err);
1877 goto failed;
1878 }
1879
1880 // Attach multistack to host port
1881 err = kern_nexus_ifattach(controller, nx->ms_instance,
1882 NULL, nx->if_instance,
1883 TRUE, &nx->ms_host);
1884 if (err != 0) {
1885 printf("%s kern_nexus_ifattach ms host %d\n", __func__, err);
1886 goto failed;
1887 }
1888
1889 // Extract the agent UUID and save for later
1890 struct kern_nexus *multistack_nx = nx_find(nx->ms_instance, false);
1891 if (multistack_nx != NULL) {
1892 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(multistack_nx);
1893 if (flowswitch != NULL) {
1894 FSW_RLOCK(flowswitch);
1895 struct fsw_ms_context *ms_context = (struct fsw_ms_context *)flowswitch->fsw_ops_private;
1896 if (ms_context != NULL) {
1897 uuid_copy(nx->ms_agent, ms_context->mc_agent_uuid);
1898 } else {
1899 printf("ipsec_multistack_attach - fsw_ms_context is NULL\n");
1900 }
1901 FSW_UNLOCK(flowswitch);
1902 } else {
1903 printf("ipsec_multistack_attach - flowswitch is NULL\n");
1904 }
1905 nx_release(multistack_nx);
1906 } else {
1907 printf("ipsec_multistack_attach - unable to find multistack nexus\n");
1908 }
1909
1910 return 0;
1911
1912 failed:
1913 ipsec_nexus_detach(pcb);
1914
1915 errno_t detach_error = 0;
1916 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
1917 panic("ipsec_multistack_attach - ifnet_detach failed: %d\n", detach_error);
1918 /* NOT REACHED */
1919 }
1920
1921 return err;
1922 }
1923
1924 #pragma mark Kernel Pipe Nexus
1925
1926 static errno_t
1927 ipsec_register_kernel_pipe_nexus(void)
1928 {
1929 nexus_attr_t nxa = NULL;
1930 errno_t result;
1931
1932 lck_mtx_lock(&ipsec_lock);
1933 if (ipsec_ncd_refcount++) {
1934 lck_mtx_unlock(&ipsec_lock);
1935 return 0;
1936 }
1937
1938 result = kern_nexus_controller_create(&ipsec_ncd);
1939 if (result) {
1940 printf("%s: kern_nexus_controller_create failed: %d\n",
1941 __FUNCTION__, result);
1942 goto done;
1943 }
1944
1945 uuid_t dom_prov;
1946 result = kern_nexus_get_builtin_domain_provider(
1947 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1948 if (result) {
1949 printf("%s: kern_nexus_get_builtin_domain_provider failed: %d\n",
1950 __FUNCTION__, result);
1951 goto done;
1952 }
1953
1954 struct kern_nexus_provider_init prov_init = {
1955 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1956 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1957 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1958 .nxpi_connected = ipsec_nexus_connected,
1959 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
1960 .nxpi_disconnected = ipsec_nexus_disconnected,
1961 .nxpi_ring_init = ipsec_kpipe_ring_init,
1962 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
1963 .nxpi_slot_init = NULL,
1964 .nxpi_slot_fini = NULL,
1965 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
1966 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
1967 .nxpi_tx_doorbell = NULL,
1968 };
1969
1970 result = kern_nexus_attr_create(&nxa);
1971 if (result) {
1972 printf("%s: kern_nexus_attr_create failed: %d\n",
1973 __FUNCTION__, result);
1974 goto done;
1975 }
1976
1977 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
1978 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1979 VERIFY(result == 0);
1980
1981 // Reset ring size for kernel pipe nexus to limit memory usage
1982 uint64_t ring_size = if_ipsec_ring_size;
1983 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1984 VERIFY(result == 0);
1985 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1986 VERIFY(result == 0);
1987
1988 result = kern_nexus_controller_register_provider(ipsec_ncd,
1989 dom_prov,
1990 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
1991 &prov_init,
1992 sizeof(prov_init),
1993 nxa,
1994 &ipsec_kpipe_uuid);
1995 if (result) {
1996 printf("%s: kern_nexus_controller_register_provider failed: %d\n",
1997 __FUNCTION__, result);
1998 goto done;
1999 }
2000
2001 done:
2002 if (nxa) {
2003 kern_nexus_attr_destroy(nxa);
2004 }
2005
2006 if (result) {
2007 if (ipsec_ncd) {
2008 kern_nexus_controller_destroy(ipsec_ncd);
2009 ipsec_ncd = NULL;
2010 }
2011 ipsec_ncd_refcount = 0;
2012 }
2013
2014 lck_mtx_unlock(&ipsec_lock);
2015
2016 return result;
2017 }
2018
2019 static void
2020 ipsec_unregister_kernel_pipe_nexus(void)
2021 {
2022 lck_mtx_lock(&ipsec_lock);
2023
2024 VERIFY(ipsec_ncd_refcount > 0);
2025
2026 if (--ipsec_ncd_refcount == 0) {
2027 kern_nexus_controller_destroy(ipsec_ncd);
2028 ipsec_ncd = NULL;
2029 }
2030
2031 lck_mtx_unlock(&ipsec_lock);
2032 }
2033
2034 // For use by socket option, not internally
2035 static errno_t
2036 ipsec_disable_channel(struct ipsec_pcb *pcb)
2037 {
2038 errno_t result;
2039 int enabled;
2040 uuid_t uuid;
2041
2042 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2043
2044 enabled = pcb->ipsec_kpipe_enabled;
2045 uuid_copy(uuid, pcb->ipsec_kpipe_uuid);
2046
2047 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid) == !enabled);
2048
2049 pcb->ipsec_kpipe_enabled = 0;
2050 uuid_clear(pcb->ipsec_kpipe_uuid);
2051
2052 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2053
2054 if (enabled) {
2055 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, uuid);
2056 } else {
2057 result = ENXIO;
2058 }
2059
2060 if (!result) {
2061 if (pcb->ipsec_kpipe_pp != NULL) {
2062 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2063 pcb->ipsec_kpipe_pp = NULL;
2064 }
2065 ipsec_unregister_kernel_pipe_nexus();
2066 }
2067
2068 return result;
2069 }
2070
2071 static errno_t
2072 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2073 {
2074 struct kern_nexus_init init;
2075 struct kern_pbufpool_init pp_init;
2076 errno_t result;
2077
2078 kauth_cred_t cred = kauth_cred_get();
2079 result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2080 if (result) {
2081 return result;
2082 }
2083
2084 result = ipsec_register_kernel_pipe_nexus();
2085 if (result) {
2086 return result;
2087 }
2088
2089 VERIFY(ipsec_ncd);
2090
2091 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2092
2093 /* ipsec driver doesn't support channels without a netif */
2094 if (!pcb->ipsec_use_netif) {
2095 result = EOPNOTSUPP;
2096 goto done;
2097 }
2098
2099 if (pcb->ipsec_kpipe_enabled) {
2100 result = EEXIST; // return success instead?
2101 goto done;
2102 }
2103
2104 bzero(&pp_init, sizeof(pp_init));
2105 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2106 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2;
2107 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2108 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2109 pp_init.kbi_max_frags = 1;
2110 pp_init.kbi_flags |= KBIF_QUANTUM;
2111 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2112 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2113
2114 result = kern_pbufpool_create(&pp_init, &pp_init, &pcb->ipsec_kpipe_pp,
2115 NULL);
2116 if (result != 0) {
2117 printf("%s pbufbool create failed, error %d\n", __func__, result);
2118 goto done;
2119 }
2120
2121 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid));
2122 bzero(&init, sizeof(init));
2123 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2124 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2125 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2126 ipsec_kpipe_uuid, pcb, &pcb->ipsec_kpipe_uuid, &init);
2127 if (result) {
2128 goto done;
2129 }
2130
2131 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2132 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2133 pcb->ipsec_kpipe_uuid, &port,
2134 proc_pid(proc), NULL, NULL, 0, NEXUS_BIND_PID);
2135 if (result) {
2136 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2137 pcb->ipsec_kpipe_uuid);
2138 uuid_clear(pcb->ipsec_kpipe_uuid);
2139 goto done;
2140 }
2141
2142 pcb->ipsec_kpipe_enabled = 1;
2143
2144 done:
2145 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2146
2147 if (result) {
2148 if (pcb->ipsec_kpipe_pp != NULL) {
2149 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2150 pcb->ipsec_kpipe_pp = NULL;
2151 }
2152 ipsec_unregister_kernel_pipe_nexus();
2153 }
2154
2155 return result;
2156 }
2157
2158 #endif // IPSEC_NEXUS
2159
2160
2161 /* Kernel control functions */
2162
2163 static inline void
2164 ipsec_free_pcb(struct ipsec_pcb *pcb, bool in_list)
2165 {
2166 #if IPSEC_NEXUS
2167 mbuf_freem_list(pcb->ipsec_input_chain);
2168 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, ipsec_lck_grp);
2169 #endif // IPSEC_NEXUS
2170 lck_rw_destroy(&pcb->ipsec_pcb_lock, ipsec_lck_grp);
2171 if (in_list) {
2172 lck_mtx_lock(&ipsec_lock);
2173 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2174 lck_mtx_unlock(&ipsec_lock);
2175 }
2176 zfree(ipsec_pcb_zone, pcb);
2177 }
2178
2179 static errno_t
2180 ipsec_ctl_bind(kern_ctl_ref kctlref,
2181 struct sockaddr_ctl *sac,
2182 void **unitinfo)
2183 {
2184 struct ipsec_pcb *pcb = zalloc(ipsec_pcb_zone);
2185 memset(pcb, 0, sizeof(*pcb));
2186
2187 /* Setup the protocol control block */
2188 *unitinfo = pcb;
2189 pcb->ipsec_ctlref = kctlref;
2190 pcb->ipsec_unit = sac->sc_unit;
2191 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2192
2193 #if IPSEC_NEXUS
2194 pcb->ipsec_use_netif = false;
2195 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2196 pcb->ipsec_netif_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
2197 pcb->ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
2198 pcb->ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
2199 #endif // IPSEC_NEXUS
2200
2201 lck_rw_init(&pcb->ipsec_pcb_lock, ipsec_lck_grp, ipsec_lck_attr);
2202 #if IPSEC_NEXUS
2203 lck_mtx_init(&pcb->ipsec_input_chain_lock, ipsec_lck_grp, ipsec_lck_attr);
2204 #endif // IPSEC_NEXUS
2205
2206 return 0;
2207 }
2208
2209 static errno_t
2210 ipsec_ctl_connect(kern_ctl_ref kctlref,
2211 struct sockaddr_ctl *sac,
2212 void **unitinfo)
2213 {
2214 struct ifnet_init_eparams ipsec_init = {};
2215 errno_t result = 0;
2216
2217 if (*unitinfo == NULL) {
2218 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2219 }
2220
2221 struct ipsec_pcb *pcb = *unitinfo;
2222
2223 lck_mtx_lock(&ipsec_lock);
2224
2225 /* Find some open interface id */
2226 u_int32_t chosen_unique_id = 1;
2227 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2228 if (next_pcb != NULL) {
2229 /* List was not empty, add one to the last item */
2230 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2231 next_pcb = NULL;
2232
2233 /*
2234 * If this wrapped the id number, start looking at
2235 * the front of the list for an unused id.
2236 */
2237 if (chosen_unique_id == 0) {
2238 /* Find the next unused ID */
2239 chosen_unique_id = 1;
2240 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2241 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2242 /* We found a gap */
2243 break;
2244 }
2245
2246 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2247 }
2248 }
2249 }
2250
2251 pcb->ipsec_unique_id = chosen_unique_id;
2252
2253 if (next_pcb != NULL) {
2254 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2255 } else {
2256 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2257 }
2258 lck_mtx_unlock(&ipsec_lock);
2259
2260 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2261 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2262 printf("ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2263
2264 /* Create the interface */
2265 bzero(&ipsec_init, sizeof(ipsec_init));
2266 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2267 ipsec_init.len = sizeof(ipsec_init);
2268
2269 #if IPSEC_NEXUS
2270 if (pcb->ipsec_use_netif) {
2271 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2272 } else
2273 #endif // IPSEC_NEXUS
2274 {
2275 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2276 ipsec_init.start = ipsec_start;
2277 }
2278 ipsec_init.name = "ipsec";
2279 ipsec_init.unit = pcb->ipsec_unit - 1;
2280 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2281 ipsec_init.uniqueid_len = strlen(pcb->ipsec_unique_name);
2282 ipsec_init.family = ipsec_family;
2283 ipsec_init.subfamily = IFNET_SUBFAMILY_IPSEC;
2284 ipsec_init.type = IFT_OTHER;
2285 ipsec_init.demux = ipsec_demux;
2286 ipsec_init.add_proto = ipsec_add_proto;
2287 ipsec_init.del_proto = ipsec_del_proto;
2288 ipsec_init.softc = pcb;
2289 ipsec_init.ioctl = ipsec_ioctl;
2290 ipsec_init.detach = ipsec_detached;
2291
2292 #if IPSEC_NEXUS
2293 if (pcb->ipsec_use_netif) {
2294 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2295 if (result != 0) {
2296 printf("ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2297 ipsec_free_pcb(pcb, true);
2298 *unitinfo = NULL;
2299 return result;
2300 }
2301
2302 result = ipsec_multistack_attach(pcb);
2303 if (result != 0) {
2304 printf("ipsec_ctl_connect - ipsec_multistack_attach failed: %d\n", result);
2305 *unitinfo = NULL;
2306 return result;
2307 }
2308
2309 /* Attach to bpf */
2310 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2311 } else
2312 #endif // IPSEC_NEXUS
2313 {
2314 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2315 if (result != 0) {
2316 printf("ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2317 ipsec_free_pcb(pcb, true);
2318 *unitinfo = NULL;
2319 return result;
2320 }
2321 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2322
2323 /* Attach the interface */
2324 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2325 if (result != 0) {
2326 printf("ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2327 ifnet_release(pcb->ipsec_ifp);
2328 ipsec_free_pcb(pcb, true);
2329 *unitinfo = NULL;
2330 return result;
2331 }
2332
2333 /* Attach to bpf */
2334 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2335 }
2336
2337 /* The interfaces resoures allocated, mark it as running */
2338 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2339
2340 return 0;
2341 }
2342
2343 static errno_t
2344 ipsec_detach_ip(ifnet_t interface,
2345 protocol_family_t protocol,
2346 socket_t pf_socket)
2347 {
2348 errno_t result = EPROTONOSUPPORT;
2349
2350 /* Attempt a detach */
2351 if (protocol == PF_INET) {
2352 struct ifreq ifr;
2353
2354 bzero(&ifr, sizeof(ifr));
2355 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2356 ifnet_name(interface), ifnet_unit(interface));
2357
2358 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2359 } else if (protocol == PF_INET6) {
2360 struct in6_ifreq ifr6;
2361
2362 bzero(&ifr6, sizeof(ifr6));
2363 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2364 ifnet_name(interface), ifnet_unit(interface));
2365
2366 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2367 }
2368
2369 return result;
2370 }
2371
2372 static void
2373 ipsec_remove_address(ifnet_t interface,
2374 protocol_family_t protocol,
2375 ifaddr_t address,
2376 socket_t pf_socket)
2377 {
2378 errno_t result = 0;
2379
2380 /* Attempt a detach */
2381 if (protocol == PF_INET) {
2382 struct ifreq ifr;
2383
2384 bzero(&ifr, sizeof(ifr));
2385 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2386 ifnet_name(interface), ifnet_unit(interface));
2387 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2388 if (result != 0) {
2389 printf("ipsec_remove_address - ifaddr_address failed: %d", result);
2390 } else {
2391 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2392 if (result != 0) {
2393 printf("ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2394 }
2395 }
2396 } else if (protocol == PF_INET6) {
2397 struct in6_ifreq ifr6;
2398
2399 bzero(&ifr6, sizeof(ifr6));
2400 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2401 ifnet_name(interface), ifnet_unit(interface));
2402 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2403 sizeof(ifr6.ifr_addr));
2404 if (result != 0) {
2405 printf("ipsec_remove_address - ifaddr_address failed (v6): %d",
2406 result);
2407 } else {
2408 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2409 if (result != 0) {
2410 printf("ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2411 result);
2412 }
2413 }
2414 }
2415 }
2416
2417 static void
2418 ipsec_cleanup_family(ifnet_t interface,
2419 protocol_family_t protocol)
2420 {
2421 errno_t result = 0;
2422 socket_t pf_socket = NULL;
2423 ifaddr_t *addresses = NULL;
2424 int i;
2425
2426 if (protocol != PF_INET && protocol != PF_INET6) {
2427 printf("ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2428 return;
2429 }
2430
2431 /* Create a socket for removing addresses and detaching the protocol */
2432 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2433 if (result != 0) {
2434 if (result != EAFNOSUPPORT) {
2435 printf("ipsec_cleanup_family - failed to create %s socket: %d\n",
2436 protocol == PF_INET ? "IP" : "IPv6", result);
2437 }
2438 goto cleanup;
2439 }
2440
2441 /* always set SS_PRIV, we want to close and detach regardless */
2442 sock_setpriv(pf_socket, 1);
2443
2444 result = ipsec_detach_ip(interface, protocol, pf_socket);
2445 if (result == 0 || result == ENXIO) {
2446 /* We are done! We either detached or weren't attached. */
2447 goto cleanup;
2448 } else if (result != EBUSY) {
2449 /* Uh, not really sure what happened here... */
2450 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2451 goto cleanup;
2452 }
2453
2454 /*
2455 * At this point, we received an EBUSY error. This means there are
2456 * addresses attached. We should detach them and then try again.
2457 */
2458 result = ifnet_get_address_list_family(interface, &addresses, protocol);
2459 if (result != 0) {
2460 printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2461 ifnet_name(interface), ifnet_unit(interface),
2462 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2463 goto cleanup;
2464 }
2465
2466 for (i = 0; addresses[i] != 0; i++) {
2467 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2468 }
2469 ifnet_free_address_list(addresses);
2470 addresses = NULL;
2471
2472 /*
2473 * The addresses should be gone, we should try the remove again.
2474 */
2475 result = ipsec_detach_ip(interface, protocol, pf_socket);
2476 if (result != 0 && result != ENXIO) {
2477 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2478 }
2479
2480 cleanup:
2481 if (pf_socket != NULL) {
2482 sock_close(pf_socket);
2483 }
2484
2485 if (addresses != NULL) {
2486 ifnet_free_address_list(addresses);
2487 }
2488 }
2489
2490 static errno_t
2491 ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2492 __unused u_int32_t unit,
2493 void *unitinfo)
2494 {
2495 struct ipsec_pcb *pcb = unitinfo;
2496 ifnet_t ifp = NULL;
2497 errno_t result = 0;
2498
2499 if (pcb == NULL) {
2500 return EINVAL;
2501 }
2502
2503 #if IPSEC_NEXUS
2504 // Tell the nexus to stop all rings
2505 if (pcb->ipsec_netif_nexus != NULL) {
2506 kern_nexus_stop(pcb->ipsec_netif_nexus);
2507 }
2508 #endif // IPSEC_NEXUS
2509
2510 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2511
2512 #if IPSEC_NEXUS
2513 uuid_t kpipe_uuid;
2514 uuid_copy(kpipe_uuid, pcb->ipsec_kpipe_uuid);
2515 uuid_clear(pcb->ipsec_kpipe_uuid);
2516 pcb->ipsec_kpipe_enabled = FALSE;
2517 #endif // IPSEC_NEXUS
2518
2519 pcb->ipsec_ctlref = NULL;
2520
2521 ifp = pcb->ipsec_ifp;
2522 if (ifp != NULL) {
2523 #if IPSEC_NEXUS
2524 if (pcb->ipsec_netif_nexus != NULL) {
2525 /*
2526 * Quiesce the interface and flush any pending outbound packets.
2527 */
2528 if_down(ifp);
2529
2530 /* Increment refcnt, but detach interface */
2531 ifnet_incr_iorefcnt(ifp);
2532 if ((result = ifnet_detach(ifp)) != 0) {
2533 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2534 /* NOT REACHED */
2535 }
2536
2537 /*
2538 * We want to do everything in our power to ensure that the interface
2539 * really goes away when the socket is closed. We must remove IP/IPv6
2540 * addresses and detach the protocols. Finally, we can remove and
2541 * release the interface.
2542 */
2543 key_delsp_for_ipsec_if(ifp);
2544
2545 ipsec_cleanup_family(ifp, AF_INET);
2546 ipsec_cleanup_family(ifp, AF_INET6);
2547
2548 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2549
2550 if (!uuid_is_null(kpipe_uuid)) {
2551 if (kern_nexus_controller_free_provider_instance(ipsec_ncd, kpipe_uuid) == 0) {
2552 if (pcb->ipsec_kpipe_pp != NULL) {
2553 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2554 pcb->ipsec_kpipe_pp = NULL;
2555 }
2556 ipsec_unregister_kernel_pipe_nexus();
2557 }
2558 }
2559 ipsec_nexus_detach(pcb);
2560
2561 /* Decrement refcnt to finish detaching and freeing */
2562 ifnet_decr_iorefcnt(ifp);
2563 } else
2564 #endif // IPSEC_NEXUS
2565 {
2566 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2567
2568 #if IPSEC_NEXUS
2569 if (!uuid_is_null(kpipe_uuid)) {
2570 if (kern_nexus_controller_free_provider_instance(ipsec_ncd, kpipe_uuid) == 0) {
2571 if (pcb->ipsec_kpipe_pp != NULL) {
2572 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2573 pcb->ipsec_kpipe_pp = NULL;
2574 }
2575 ipsec_unregister_kernel_pipe_nexus();
2576 }
2577 }
2578 #endif // IPSEC_NEXUS
2579
2580 /*
2581 * We want to do everything in our power to ensure that the interface
2582 * really goes away when the socket is closed. We must remove IP/IPv6
2583 * addresses and detach the protocols. Finally, we can remove and
2584 * release the interface.
2585 */
2586 key_delsp_for_ipsec_if(ifp);
2587
2588 ipsec_cleanup_family(ifp, AF_INET);
2589 ipsec_cleanup_family(ifp, AF_INET6);
2590
2591 /*
2592 * Detach now; ipsec_detach() will be called asynchronously once
2593 * the I/O reference count drops to 0. There we will invoke
2594 * ifnet_release().
2595 */
2596 if ((result = ifnet_detach(ifp)) != 0) {
2597 printf("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2598 }
2599 }
2600 } else {
2601 // Bound, but not connected
2602 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2603 ipsec_free_pcb(pcb, false);
2604 }
2605
2606 return 0;
2607 }
2608
2609 static errno_t
2610 ipsec_ctl_send(__unused kern_ctl_ref kctlref,
2611 __unused u_int32_t unit,
2612 __unused void *unitinfo,
2613 mbuf_t m,
2614 __unused int flags)
2615 {
2616 /* Receive messages from the control socket. Currently unused. */
2617 mbuf_freem(m);
2618 return 0;
2619 }
2620
2621 static errno_t
2622 ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
2623 __unused u_int32_t unit,
2624 void *unitinfo,
2625 int opt,
2626 void *data,
2627 size_t len)
2628 {
2629 struct ipsec_pcb *pcb = unitinfo;
2630 errno_t result = 0;
2631
2632 /* check for privileges for privileged options */
2633 switch (opt) {
2634 case IPSEC_OPT_FLAGS:
2635 case IPSEC_OPT_EXT_IFDATA_STATS:
2636 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
2637 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
2638 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2639 return EPERM;
2640 }
2641 break;
2642 }
2643
2644 switch (opt) {
2645 case IPSEC_OPT_FLAGS:
2646 if (len != sizeof(u_int32_t)) {
2647 result = EMSGSIZE;
2648 } else {
2649 pcb->ipsec_flags = *(u_int32_t *)data;
2650 }
2651 break;
2652
2653 case IPSEC_OPT_EXT_IFDATA_STATS:
2654 if (len != sizeof(int)) {
2655 result = EMSGSIZE;
2656 break;
2657 }
2658 if (pcb->ipsec_ifp == NULL) {
2659 // Only can set after connecting
2660 result = EINVAL;
2661 break;
2662 }
2663 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2664 break;
2665
2666 case IPSEC_OPT_INC_IFDATA_STATS_IN:
2667 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
2668 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
2669
2670 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
2671 result = EINVAL;
2672 break;
2673 }
2674 if (pcb->ipsec_ifp == NULL) {
2675 // Only can set after connecting
2676 result = EINVAL;
2677 break;
2678 }
2679 if (!pcb->ipsec_ext_ifdata_stats) {
2680 result = EINVAL;
2681 break;
2682 }
2683 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
2684 ifnet_stat_increment_in(pcb->ipsec_ifp, utsp->utsp_packets,
2685 utsp->utsp_bytes, utsp->utsp_errors);
2686 } else {
2687 ifnet_stat_increment_out(pcb->ipsec_ifp, utsp->utsp_packets,
2688 utsp->utsp_bytes, utsp->utsp_errors);
2689 }
2690 break;
2691 }
2692
2693 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
2694 ifnet_t del_ifp = NULL;
2695 char name[IFNAMSIZ];
2696
2697 if (len > IFNAMSIZ - 1) {
2698 result = EMSGSIZE;
2699 break;
2700 }
2701 if (pcb->ipsec_ifp == NULL) {
2702 // Only can set after connecting
2703 result = EINVAL;
2704 break;
2705 }
2706 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2707 bcopy(data, name, len);
2708 name[len] = 0;
2709 result = ifnet_find_by_name(name, &del_ifp);
2710 }
2711 if (result == 0) {
2712 printf("%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
2713 __func__, pcb->ipsec_ifp->if_xname,
2714 del_ifp ? del_ifp->if_xname : "NULL");
2715
2716 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
2717 if (del_ifp) {
2718 ifnet_release(del_ifp);
2719 }
2720 }
2721 break;
2722 }
2723
2724 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
2725 if (len != sizeof(int)) {
2726 result = EMSGSIZE;
2727 break;
2728 }
2729 if (pcb->ipsec_ifp == NULL) {
2730 // Only can set after connecting
2731 result = EINVAL;
2732 break;
2733 }
2734 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
2735 if (output_service_class == MBUF_SC_UNSPEC) {
2736 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2737 } else {
2738 pcb->ipsec_output_service_class = output_service_class;
2739 }
2740 printf("%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
2741 __func__, pcb->ipsec_ifp->if_xname,
2742 pcb->ipsec_output_service_class);
2743 break;
2744 }
2745
2746 #if IPSEC_NEXUS
2747 case IPSEC_OPT_ENABLE_CHANNEL: {
2748 if (len != sizeof(int)) {
2749 result = EMSGSIZE;
2750 break;
2751 }
2752 if (pcb->ipsec_ifp == NULL) {
2753 // Only can set after connecting
2754 result = EINVAL;
2755 break;
2756 }
2757 if (*(int *)data) {
2758 result = ipsec_enable_channel(pcb, current_proc());
2759 } else {
2760 result = ipsec_disable_channel(pcb);
2761 }
2762 break;
2763 }
2764
2765 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
2766 if (len != sizeof(int)) {
2767 result = EMSGSIZE;
2768 break;
2769 }
2770 if (pcb->ipsec_ifp == NULL) {
2771 // Only can set after connecting
2772 result = EINVAL;
2773 break;
2774 }
2775 if (!if_is_netagent_enabled()) {
2776 result = ENOTSUP;
2777 break;
2778 }
2779 if (uuid_is_null(pcb->ipsec_nx.ms_agent)) {
2780 result = ENOENT;
2781 break;
2782 }
2783
2784 if (*(int *)data) {
2785 if_add_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2786 pcb->ipsec_needs_netagent = true;
2787 } else {
2788 pcb->ipsec_needs_netagent = false;
2789 if_delete_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2790 }
2791 break;
2792 }
2793
2794 case IPSEC_OPT_INPUT_FRAG_SIZE: {
2795 if (len != sizeof(u_int32_t)) {
2796 result = EMSGSIZE;
2797 break;
2798 }
2799 u_int32_t input_frag_size = *(u_int32_t *)data;
2800 if (input_frag_size <= sizeof(struct ip6_hdr)) {
2801 pcb->ipsec_frag_size_set = FALSE;
2802 pcb->ipsec_input_frag_size = 0;
2803 } else {
2804 printf("SET FRAG SIZE TO %u\n", input_frag_size);
2805 pcb->ipsec_frag_size_set = TRUE;
2806 pcb->ipsec_input_frag_size = input_frag_size;
2807 }
2808 break;
2809 }
2810 case IPSEC_OPT_ENABLE_NETIF: {
2811 if (len != sizeof(int)) {
2812 result = EMSGSIZE;
2813 break;
2814 }
2815 if (pcb->ipsec_ifp != NULL) {
2816 // Only can set before connecting
2817 result = EINVAL;
2818 break;
2819 }
2820 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2821 pcb->ipsec_use_netif = !!(*(int *)data);
2822 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2823 break;
2824 }
2825 case IPSEC_OPT_SLOT_SIZE: {
2826 if (len != sizeof(u_int32_t)) {
2827 result = EMSGSIZE;
2828 break;
2829 }
2830 if (pcb->ipsec_ifp != NULL) {
2831 // Only can set before connecting
2832 result = EINVAL;
2833 break;
2834 }
2835 u_int32_t slot_size = *(u_int32_t *)data;
2836 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
2837 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
2838 return EINVAL;
2839 }
2840 pcb->ipsec_slot_size = slot_size;
2841 break;
2842 }
2843 case IPSEC_OPT_NETIF_RING_SIZE: {
2844 if (len != sizeof(u_int32_t)) {
2845 result = EMSGSIZE;
2846 break;
2847 }
2848 if (pcb->ipsec_ifp != NULL) {
2849 // Only can set before connecting
2850 result = EINVAL;
2851 break;
2852 }
2853 u_int32_t ring_size = *(u_int32_t *)data;
2854 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
2855 ring_size > IPSEC_IF_MAX_RING_SIZE) {
2856 return EINVAL;
2857 }
2858 pcb->ipsec_netif_ring_size = ring_size;
2859 break;
2860 }
2861 case IPSEC_OPT_TX_FSW_RING_SIZE: {
2862 if (len != sizeof(u_int32_t)) {
2863 result = EMSGSIZE;
2864 break;
2865 }
2866 if (pcb->ipsec_ifp != NULL) {
2867 // Only can set before connecting
2868 result = EINVAL;
2869 break;
2870 }
2871 u_int32_t ring_size = *(u_int32_t *)data;
2872 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
2873 ring_size > IPSEC_IF_MAX_RING_SIZE) {
2874 return EINVAL;
2875 }
2876 pcb->ipsec_tx_fsw_ring_size = ring_size;
2877 break;
2878 }
2879 case IPSEC_OPT_RX_FSW_RING_SIZE: {
2880 if (len != sizeof(u_int32_t)) {
2881 result = EMSGSIZE;
2882 break;
2883 }
2884 if (pcb->ipsec_ifp != NULL) {
2885 // Only can set before connecting
2886 result = EINVAL;
2887 break;
2888 }
2889 u_int32_t ring_size = *(u_int32_t *)data;
2890 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
2891 ring_size > IPSEC_IF_MAX_RING_SIZE) {
2892 return EINVAL;
2893 }
2894 pcb->ipsec_rx_fsw_ring_size = ring_size;
2895 break;
2896 }
2897
2898 #endif // IPSEC_NEXUS
2899
2900 default:
2901 result = ENOPROTOOPT;
2902 break;
2903 }
2904
2905 return result;
2906 }
2907
2908 static errno_t
2909 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
2910 __unused u_int32_t unit,
2911 void *unitinfo,
2912 int opt,
2913 void *data,
2914 size_t *len)
2915 {
2916 struct ipsec_pcb *pcb = unitinfo;
2917 errno_t result = 0;
2918
2919 switch (opt) {
2920 case IPSEC_OPT_FLAGS: {
2921 if (*len != sizeof(u_int32_t)) {
2922 result = EMSGSIZE;
2923 } else {
2924 *(u_int32_t *)data = pcb->ipsec_flags;
2925 }
2926 break;
2927 }
2928
2929 case IPSEC_OPT_EXT_IFDATA_STATS: {
2930 if (*len != sizeof(int)) {
2931 result = EMSGSIZE;
2932 } else {
2933 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
2934 }
2935 break;
2936 }
2937
2938 case IPSEC_OPT_IFNAME: {
2939 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
2940 result = EMSGSIZE;
2941 } else {
2942 if (pcb->ipsec_ifp == NULL) {
2943 // Only can get after connecting
2944 result = EINVAL;
2945 break;
2946 }
2947 *len = snprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
2948 }
2949 break;
2950 }
2951
2952 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
2953 if (*len != sizeof(int)) {
2954 result = EMSGSIZE;
2955 } else {
2956 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
2957 }
2958 break;
2959 }
2960
2961 #if IPSEC_NEXUS
2962
2963 case IPSEC_OPT_ENABLE_CHANNEL: {
2964 if (*len != sizeof(int)) {
2965 result = EMSGSIZE;
2966 } else {
2967 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2968 *(int *)data = pcb->ipsec_kpipe_enabled;
2969 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2970 }
2971 break;
2972 }
2973
2974 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
2975 if (*len != sizeof(int)) {
2976 result = EMSGSIZE;
2977 } else {
2978 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2979 }
2980 break;
2981 }
2982
2983 case IPSEC_OPT_ENABLE_NETIF: {
2984 if (*len != sizeof(int)) {
2985 result = EMSGSIZE;
2986 } else {
2987 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2988 *(int *)data = !!pcb->ipsec_use_netif;
2989 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2990 }
2991 break;
2992 }
2993
2994 case IPSEC_OPT_GET_CHANNEL_UUID: {
2995 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2996 if (uuid_is_null(pcb->ipsec_kpipe_uuid)) {
2997 result = ENXIO;
2998 } else if (*len != sizeof(uuid_t)) {
2999 result = EMSGSIZE;
3000 } else {
3001 uuid_copy(data, pcb->ipsec_kpipe_uuid);
3002 }
3003 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3004 break;
3005 }
3006
3007 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3008 if (*len != sizeof(u_int32_t)) {
3009 result = EMSGSIZE;
3010 } else {
3011 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
3012 }
3013 break;
3014 }
3015 case IPSEC_OPT_SLOT_SIZE: {
3016 if (*len != sizeof(u_int32_t)) {
3017 result = EMSGSIZE;
3018 } else {
3019 *(u_int32_t *)data = pcb->ipsec_slot_size;
3020 }
3021 break;
3022 }
3023 case IPSEC_OPT_NETIF_RING_SIZE: {
3024 if (*len != sizeof(u_int32_t)) {
3025 result = EMSGSIZE;
3026 } else {
3027 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3028 }
3029 break;
3030 }
3031 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3032 if (*len != sizeof(u_int32_t)) {
3033 result = EMSGSIZE;
3034 } else {
3035 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3036 }
3037 break;
3038 }
3039 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3040 if (*len != sizeof(u_int32_t)) {
3041 result = EMSGSIZE;
3042 } else {
3043 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3044 }
3045 break;
3046 }
3047
3048 #endif // IPSEC_NEXUS
3049
3050 default: {
3051 result = ENOPROTOOPT;
3052 break;
3053 }
3054 }
3055
3056 return result;
3057 }
3058
3059 /* Network Interface functions */
3060 static errno_t
3061 ipsec_output(ifnet_t interface,
3062 mbuf_t data)
3063 {
3064 struct ipsec_pcb *pcb = ifnet_softc(interface);
3065 struct ipsec_output_state ipsec_state;
3066 struct route ro;
3067 struct route_in6 ro6;
3068 int length;
3069 struct ip *ip;
3070 struct ip6_hdr *ip6;
3071 struct ip_out_args ipoa;
3072 struct ip6_out_args ip6oa;
3073 int error = 0;
3074 u_int ip_version = 0;
3075 int flags = 0;
3076 struct flowadv *adv = NULL;
3077
3078 // Make sure this packet isn't looping through the interface
3079 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3080 error = EINVAL;
3081 goto ipsec_output_err;
3082 }
3083
3084 // Mark the interface so NECP can evaluate tunnel policy
3085 necp_mark_packet_from_interface(data, interface);
3086
3087 ip = mtod(data, struct ip *);
3088 ip_version = ip->ip_v;
3089
3090 switch (ip_version) {
3091 case 4: {
3092 #if IPSEC_NEXUS
3093 if (!pcb->ipsec_use_netif)
3094 #endif // IPSEC_NEXUS
3095 {
3096 int af = AF_INET;
3097 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3098 }
3099
3100 /* Apply encryption */
3101 memset(&ipsec_state, 0, sizeof(ipsec_state));
3102 ipsec_state.m = data;
3103 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3104 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3105
3106 error = ipsec4_interface_output(&ipsec_state, interface);
3107 /* Tunneled in IPv6 - packet is gone */
3108 if (error == 0 && ipsec_state.tunneled == 6) {
3109 goto done;
3110 }
3111
3112 data = ipsec_state.m;
3113 if (error || data == NULL) {
3114 if (error) {
3115 printf("ipsec_output: ipsec4_output error %d.\n", error);
3116 }
3117 goto ipsec_output_err;
3118 }
3119
3120 /* Set traffic class, set flow */
3121 m_set_service_class(data, pcb->ipsec_output_service_class);
3122 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3123 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3124 data->m_pkthdr.pkt_proto = ip->ip_p;
3125 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3126
3127 /* Flip endian-ness for ip_output */
3128 ip = mtod(data, struct ip *);
3129 NTOHS(ip->ip_len);
3130 NTOHS(ip->ip_off);
3131
3132 /* Increment statistics */
3133 length = mbuf_pkthdr_len(data);
3134 ifnet_stat_increment_out(interface, 1, length, 0);
3135
3136 /* Send to ip_output */
3137 memset(&ro, 0, sizeof(ro));
3138
3139 flags = (IP_OUTARGS | /* Passing out args to specify interface */
3140 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
3141
3142 memset(&ipoa, 0, sizeof(ipoa));
3143 ipoa.ipoa_flowadv.code = 0;
3144 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3145 if (ipsec_state.outgoing_if) {
3146 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3147 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3148 }
3149 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3150
3151 adv = &ipoa.ipoa_flowadv;
3152
3153 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3154 data = NULL;
3155
3156 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3157 error = ENOBUFS;
3158 ifnet_disable_output(interface);
3159 }
3160
3161 goto done;
3162 }
3163 case 6: {
3164 #if IPSEC_NEXUS
3165 if (!pcb->ipsec_use_netif)
3166 #endif // IPSEC_NEXUS
3167 {
3168 int af = AF_INET6;
3169 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3170 }
3171
3172 data = ipsec6_splithdr(data);
3173 if (data == NULL) {
3174 printf("ipsec_output: ipsec6_splithdr returned NULL\n");
3175 goto ipsec_output_err;
3176 }
3177
3178 ip6 = mtod(data, struct ip6_hdr *);
3179
3180 memset(&ipsec_state, 0, sizeof(ipsec_state));
3181 ipsec_state.m = data;
3182 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3183 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3184
3185 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3186 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
3187 goto done;
3188 }
3189 data = ipsec_state.m;
3190 if (error || data == NULL) {
3191 if (error) {
3192 printf("ipsec_output: ipsec6_output error %d\n", error);
3193 }
3194 goto ipsec_output_err;
3195 }
3196
3197 /* Set traffic class, set flow */
3198 m_set_service_class(data, pcb->ipsec_output_service_class);
3199 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3200 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3201 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3202 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3203
3204 /* Increment statistics */
3205 length = mbuf_pkthdr_len(data);
3206 ifnet_stat_increment_out(interface, 1, length, 0);
3207
3208 /* Send to ip6_output */
3209 memset(&ro6, 0, sizeof(ro6));
3210
3211 flags = IPV6_OUTARGS;
3212
3213 memset(&ip6oa, 0, sizeof(ip6oa));
3214 ip6oa.ip6oa_flowadv.code = 0;
3215 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3216 if (ipsec_state.outgoing_if) {
3217 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3218 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3219 }
3220 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3221
3222 adv = &ip6oa.ip6oa_flowadv;
3223
3224 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3225 data = NULL;
3226
3227 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3228 error = ENOBUFS;
3229 ifnet_disable_output(interface);
3230 }
3231
3232 goto done;
3233 }
3234 default: {
3235 printf("ipsec_output: Received unknown packet version %d.\n", ip_version);
3236 error = EINVAL;
3237 goto ipsec_output_err;
3238 }
3239 }
3240
3241 done:
3242 return error;
3243
3244 ipsec_output_err:
3245 if (data) {
3246 mbuf_freem(data);
3247 }
3248 goto done;
3249 }
3250
3251 static void
3252 ipsec_start(ifnet_t interface)
3253 {
3254 mbuf_t data;
3255 struct ipsec_pcb *pcb = ifnet_softc(interface);
3256
3257 VERIFY(pcb != NULL);
3258 for (;;) {
3259 if (ifnet_dequeue(interface, &data) != 0) {
3260 break;
3261 }
3262 if (ipsec_output(interface, data) != 0) {
3263 break;
3264 }
3265 }
3266 }
3267
3268 /* Network Interface functions */
3269 static errno_t
3270 ipsec_demux(__unused ifnet_t interface,
3271 mbuf_t data,
3272 __unused char *frame_header,
3273 protocol_family_t *protocol)
3274 {
3275 struct ip *ip;
3276 u_int ip_version;
3277
3278 while (data != NULL && mbuf_len(data) < 1) {
3279 data = mbuf_next(data);
3280 }
3281
3282 if (data == NULL) {
3283 return ENOENT;
3284 }
3285
3286 ip = mtod(data, struct ip *);
3287 ip_version = ip->ip_v;
3288
3289 switch (ip_version) {
3290 case 4:
3291 *protocol = PF_INET;
3292 return 0;
3293 case 6:
3294 *protocol = PF_INET6;
3295 return 0;
3296 default:
3297 break;
3298 }
3299
3300 return 0;
3301 }
3302
3303 static errno_t
3304 ipsec_add_proto(__unused ifnet_t interface,
3305 protocol_family_t protocol,
3306 __unused const struct ifnet_demux_desc *demux_array,
3307 __unused u_int32_t demux_count)
3308 {
3309 switch (protocol) {
3310 case PF_INET:
3311 return 0;
3312 case PF_INET6:
3313 return 0;
3314 default:
3315 break;
3316 }
3317
3318 return ENOPROTOOPT;
3319 }
3320
3321 static errno_t
3322 ipsec_del_proto(__unused ifnet_t interface,
3323 __unused protocol_family_t protocol)
3324 {
3325 return 0;
3326 }
3327
3328 static errno_t
3329 ipsec_ioctl(ifnet_t interface,
3330 u_long command,
3331 void *data)
3332 {
3333 #if IPSEC_NEXUS
3334 struct ipsec_pcb *pcb = ifnet_softc(interface);
3335 #endif
3336 errno_t result = 0;
3337
3338 switch (command) {
3339 case SIOCSIFMTU: {
3340 #if IPSEC_NEXUS
3341 if (pcb->ipsec_use_netif) {
3342 // Make sure we can fit packets in the channel buffers
3343 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
3344 result = EINVAL;
3345 } else {
3346 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
3347 }
3348 } else
3349 #endif // IPSEC_NEXUS
3350 {
3351 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
3352 }
3353 break;
3354 }
3355
3356 case SIOCSIFFLAGS:
3357 /* ifioctl() takes care of it */
3358 break;
3359
3360 default:
3361 result = EOPNOTSUPP;
3362 }
3363
3364 return result;
3365 }
3366
3367 static void
3368 ipsec_detached(ifnet_t interface)
3369 {
3370 struct ipsec_pcb *pcb = ifnet_softc(interface);
3371 (void)ifnet_release(interface);
3372 ipsec_free_pcb(pcb, true);
3373 }
3374
3375 /* Protocol Handlers */
3376
3377 static errno_t
3378 ipsec_proto_input(ifnet_t interface,
3379 protocol_family_t protocol,
3380 mbuf_t m,
3381 __unused char *frame_header)
3382 {
3383 mbuf_pkthdr_setrcvif(m, interface);
3384
3385 #if IPSEC_NEXUS
3386 struct ipsec_pcb *pcb = ifnet_softc(interface);
3387 if (!pcb->ipsec_use_netif)
3388 #endif // IPSEC_NEXUS
3389 {
3390 uint32_t af = 0;
3391 struct ip *ip = mtod(m, struct ip *);
3392 if (ip->ip_v == 4) {
3393 af = AF_INET;
3394 } else if (ip->ip_v == 6) {
3395 af = AF_INET6;
3396 }
3397 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
3398 pktap_input(interface, protocol, m, NULL);
3399 }
3400
3401 int32_t pktlen = m->m_pkthdr.len;
3402 if (proto_input(protocol, m) != 0) {
3403 ifnet_stat_increment_in(interface, 0, 0, 1);
3404 m_freem(m);
3405 } else {
3406 ifnet_stat_increment_in(interface, 1, pktlen, 0);
3407 }
3408
3409 return 0;
3410 }
3411
3412 static errno_t
3413 ipsec_proto_pre_output(__unused ifnet_t interface,
3414 protocol_family_t protocol,
3415 __unused mbuf_t *packet,
3416 __unused const struct sockaddr *dest,
3417 __unused void *route,
3418 __unused char *frame_type,
3419 __unused char *link_layer_dest)
3420 {
3421 *(protocol_family_t *)(void *)frame_type = protocol;
3422 return 0;
3423 }
3424
3425 static errno_t
3426 ipsec_attach_proto(ifnet_t interface,
3427 protocol_family_t protocol)
3428 {
3429 struct ifnet_attach_proto_param proto;
3430 errno_t result;
3431
3432 bzero(&proto, sizeof(proto));
3433 proto.input = ipsec_proto_input;
3434 proto.pre_output = ipsec_proto_pre_output;
3435
3436 result = ifnet_attach_protocol(interface, protocol, &proto);
3437 if (result != 0 && result != EEXIST) {
3438 printf("ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
3439 protocol, result);
3440 }
3441
3442 return result;
3443 }
3444
3445 errno_t
3446 ipsec_inject_inbound_packet(ifnet_t interface,
3447 mbuf_t packet)
3448 {
3449 #if IPSEC_NEXUS
3450 struct ipsec_pcb *pcb = ifnet_softc(interface);
3451
3452 if (pcb->ipsec_use_netif) {
3453 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3454
3455 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
3456 if (pcb->ipsec_input_chain != NULL) {
3457 pcb->ipsec_input_chain_last->m_nextpkt = packet;
3458 } else {
3459 pcb->ipsec_input_chain = packet;
3460 }
3461 while (packet->m_nextpkt) {
3462 VERIFY(packet != packet->m_nextpkt);
3463 packet = packet->m_nextpkt;
3464 }
3465 pcb->ipsec_input_chain_last = packet;
3466 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
3467
3468 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring;
3469 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3470
3471 if (rx_ring != NULL) {
3472 kern_channel_notify(rx_ring, 0);
3473 }
3474
3475 return 0;
3476 } else
3477 #endif // IPSEC_NEXUS
3478 {
3479 errno_t error;
3480 protocol_family_t protocol;
3481 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
3482 return error;
3483 }
3484
3485 return ipsec_proto_input(interface, protocol, packet, NULL);
3486 }
3487 }
3488
3489 void
3490 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family)
3491 {
3492 if (packet != NULL && interface != NULL) {
3493 struct ipsec_pcb *pcb = ifnet_softc(interface);
3494 if (pcb != NULL) {
3495 /* Set traffic class, set flow */
3496 m_set_service_class(packet, pcb->ipsec_output_service_class);
3497 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3498 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
3499 if (family == AF_INET) {
3500 struct ip *ip = mtod(packet, struct ip *);
3501 packet->m_pkthdr.pkt_proto = ip->ip_p;
3502 } else if (family == AF_INET6) {
3503 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
3504 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3505 }
3506 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3507 }
3508 }
3509 }
3510
3511 void
3512 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
3513 {
3514 struct ipsec_pcb *pcb;
3515
3516 if (interface == NULL || ipoa == NULL) {
3517 return;
3518 }
3519 pcb = ifnet_softc(interface);
3520
3521 if (net_qos_policy_restricted == 0) {
3522 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
3523 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
3524 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
3525 net_qos_policy_restrict_avapps != 0) {
3526 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
3527 } else {
3528 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
3529 ipoa->ipoa_sotc = SO_TC_VO;
3530 }
3531 }
3532
3533 void
3534 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
3535 {
3536 struct ipsec_pcb *pcb;
3537
3538 if (interface == NULL || ip6oa == NULL) {
3539 return;
3540 }
3541 pcb = ifnet_softc(interface);
3542
3543 if (net_qos_policy_restricted == 0) {
3544 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
3545 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
3546 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
3547 net_qos_policy_restrict_avapps != 0) {
3548 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
3549 } else {
3550 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
3551 ip6oa->ip6oa_sotc = SO_TC_VO;
3552 }
3553 }