]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/if_ipsec.c
xnu-4570.41.2.tar.gz
[apple/xnu.git] / bsd / net / if_ipsec.c
1 /*
2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/systm.h>
30 #include <sys/kern_control.h>
31 #include <net/kpi_protocol.h>
32 #include <net/kpi_interface.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <net/if.h>
36 #include <net/if_types.h>
37 #include <net/bpf.h>
38 #include <net/if_ipsec.h>
39 #include <sys/mbuf.h>
40 #include <sys/sockio.h>
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43 #include <netinet6/in6_var.h>
44 #include <netinet6/ip6_var.h>
45 #include <sys/kauth.h>
46 #include <netinet6/ipsec.h>
47 #include <netinet6/ipsec6.h>
48 #include <netinet6/esp.h>
49 #include <netinet6/esp6.h>
50 #include <netinet/ip.h>
51 #include <net/flowadv.h>
52 #include <net/necp.h>
53 #include <netkey/key.h>
54 #include <net/pktap.h>
55 #include <kern/zalloc.h>
56
57 #define IPSEC_NEXUS 0
58
59 extern int net_qos_policy_restricted;
60 extern int net_qos_policy_restrict_avapps;
61 extern unsigned int if_enable_netagent;
62
63 /* Kernel Control functions */
64 static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
65 void **unitinfo);
66 static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
67 void **unitinfo);
68 static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
69 void *unitinfo);
70 static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
71 void *unitinfo, mbuf_t m, int flags);
72 static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
73 int opt, void *data, size_t *len);
74 static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
75 int opt, void *data, size_t len);
76
77 /* Network Interface functions */
78 static void ipsec_start(ifnet_t interface);
79 static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
80 static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
81 protocol_family_t *protocol);
82 static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
83 const struct ifnet_demux_desc *demux_array,
84 u_int32_t demux_count);
85 static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
86 static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
87 static void ipsec_detached(ifnet_t interface);
88
89 /* Protocol handlers */
90 static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
91 static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
92 mbuf_t m, char *frame_header);
93 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
94 mbuf_t *packet, const struct sockaddr *dest, void *route,
95 char *frame_type, char *link_layer_dest);
96
97 static kern_ctl_ref ipsec_kctlref;
98 static u_int32_t ipsec_family;
99 static lck_attr_t *ipsec_lck_attr;
100 static lck_grp_attr_t *ipsec_lck_grp_attr;
101 static lck_grp_t *ipsec_lck_grp;
102 static lck_mtx_t ipsec_lock;
103
104 #if IPSEC_NEXUS
105
106 SYSCTL_DECL(_net_ipsec);
107 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
108 static int if_ipsec_verify_interface_creation = 0;
109 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
110
111 #define IPSEC_IF_VERIFY(_e) if (unlikely(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
112
113 #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
114 #define IPSEC_IF_DEFAULT_RING_SIZE 64
115 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
116 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
117
118 #define IPSEC_IF_MIN_RING_SIZE 16
119 #define IPSEC_IF_MAX_RING_SIZE 1024
120
121 #define IPSEC_IF_MIN_SLOT_SIZE 1024
122 #define IPSEC_IF_MAX_SLOT_SIZE 4096
123
124 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
125 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
126 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
127
128 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
129 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
130 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
131
132 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
133 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
134 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
135 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
136 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
137 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
138
139 static errno_t
140 ipsec_register_nexus(void);
141
142 typedef struct ipsec_nx {
143 uuid_t if_provider;
144 uuid_t if_instance;
145 uuid_t ms_provider;
146 uuid_t ms_instance;
147 uuid_t ms_device;
148 uuid_t ms_host;
149 uuid_t ms_agent;
150 } *ipsec_nx_t;
151
152 static nexus_controller_t ipsec_ncd;
153 static int ipsec_ncd_refcount;
154 static uuid_t ipsec_kpipe_uuid;
155
156 #endif // IPSEC_NEXUS
157
158 /* Control block allocated for each kernel control connection */
159 struct ipsec_pcb {
160 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
161 kern_ctl_ref ipsec_ctlref;
162 ifnet_t ipsec_ifp;
163 u_int32_t ipsec_unit;
164 u_int32_t ipsec_unique_id;
165 u_int32_t ipsec_flags;
166 u_int32_t ipsec_input_frag_size;
167 bool ipsec_frag_size_set;
168 int ipsec_ext_ifdata_stats;
169 mbuf_svc_class_t ipsec_output_service_class;
170 char ipsec_if_xname[IFXNAMSIZ];
171 char ipsec_unique_name[IFXNAMSIZ];
172 // PCB lock protects state fields, like ipsec_kpipe_enabled
173 decl_lck_rw_data(, ipsec_pcb_lock);
174
175 #if IPSEC_NEXUS
176 lck_mtx_t ipsec_input_chain_lock;
177 struct mbuf * ipsec_input_chain;
178 struct mbuf * ipsec_input_chain_last;
179 // Input chain lock protects the list of input mbufs
180 // The input chain lock must be taken AFTER the PCB lock if both are held
181 struct ipsec_nx ipsec_nx;
182 int ipsec_kpipe_enabled;
183 uuid_t ipsec_kpipe_uuid;
184 void * ipsec_kpipe_rxring;
185 void * ipsec_kpipe_txring;
186
187 kern_nexus_t ipsec_netif_nexus;
188 void * ipsec_netif_rxring;
189 void * ipsec_netif_txring;
190 uint64_t ipsec_netif_txring_size;
191
192 u_int32_t ipsec_slot_size;
193 u_int32_t ipsec_netif_ring_size;
194 u_int32_t ipsec_tx_fsw_ring_size;
195 u_int32_t ipsec_rx_fsw_ring_size;
196 bool ipsec_use_netif;
197
198 #endif // IPSEC_NEXUS
199 };
200
201 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
202
203 #define IPSEC_PCB_ZONE_MAX 32
204 #define IPSEC_PCB_ZONE_NAME "net.if_ipsec"
205
206 static unsigned int ipsec_pcb_size; /* size of zone element */
207 static struct zone *ipsec_pcb_zone; /* zone for ipsec_pcb */
208
209 #define IPSECQ_MAXLEN 256
210
211 #if IPSEC_NEXUS
212 static int
213 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
214 {
215 #pragma unused(arg1, arg2)
216 int value = if_ipsec_ring_size;
217
218 int error = sysctl_handle_int(oidp, &value, 0, req);
219 if (error || !req->newptr) {
220 return (error);
221 }
222
223 if (value < IPSEC_IF_MIN_RING_SIZE ||
224 value > IPSEC_IF_MAX_RING_SIZE) {
225 return (EINVAL);
226 }
227
228 if_ipsec_ring_size = value;
229
230 return (0);
231 }
232
233 static int
234 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
235 {
236 #pragma unused(arg1, arg2)
237 int value = if_ipsec_tx_fsw_ring_size;
238
239 int error = sysctl_handle_int(oidp, &value, 0, req);
240 if (error || !req->newptr) {
241 return (error);
242 }
243
244 if (value < IPSEC_IF_MIN_RING_SIZE ||
245 value > IPSEC_IF_MAX_RING_SIZE) {
246 return (EINVAL);
247 }
248
249 if_ipsec_tx_fsw_ring_size = value;
250
251 return (0);
252 }
253
254 static int
255 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
256 {
257 #pragma unused(arg1, arg2)
258 int value = if_ipsec_rx_fsw_ring_size;
259
260 int error = sysctl_handle_int(oidp, &value, 0, req);
261 if (error || !req->newptr) {
262 return (error);
263 }
264
265 if (value < IPSEC_IF_MIN_RING_SIZE ||
266 value > IPSEC_IF_MAX_RING_SIZE) {
267 return (EINVAL);
268 }
269
270 if_ipsec_rx_fsw_ring_size = value;
271
272 return (0);
273 }
274 #endif // IPSEC_NEXUS
275
276 errno_t
277 ipsec_register_control(void)
278 {
279 struct kern_ctl_reg kern_ctl;
280 errno_t result = 0;
281
282 /* Find a unique value for our interface family */
283 result = mbuf_tag_id_find(IPSEC_CONTROL_NAME, &ipsec_family);
284 if (result != 0) {
285 printf("ipsec_register_control - mbuf_tag_id_find_internal failed: %d\n", result);
286 return result;
287 }
288
289 ipsec_pcb_size = sizeof(struct ipsec_pcb);
290 ipsec_pcb_zone = zinit(ipsec_pcb_size,
291 IPSEC_PCB_ZONE_MAX * ipsec_pcb_size,
292 0, IPSEC_PCB_ZONE_NAME);
293 if (ipsec_pcb_zone == NULL) {
294 printf("ipsec_register_control - zinit(ipsec_pcb) failed");
295 return ENOMEM;
296 }
297
298 #if IPSEC_NEXUS
299 ipsec_register_nexus();
300 #endif // IPSEC_NEXUS
301
302 TAILQ_INIT(&ipsec_head);
303
304 bzero(&kern_ctl, sizeof(kern_ctl));
305 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
306 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
307 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */
308 kern_ctl.ctl_sendsize = 64 * 1024;
309 kern_ctl.ctl_recvsize = 64 * 1024;
310 kern_ctl.ctl_bind = ipsec_ctl_bind;
311 kern_ctl.ctl_connect = ipsec_ctl_connect;
312 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
313 kern_ctl.ctl_send = ipsec_ctl_send;
314 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
315 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
316
317 result = ctl_register(&kern_ctl, &ipsec_kctlref);
318 if (result != 0) {
319 printf("ipsec_register_control - ctl_register failed: %d\n", result);
320 return result;
321 }
322
323 /* Register the protocol plumbers */
324 if ((result = proto_register_plumber(PF_INET, ipsec_family,
325 ipsec_attach_proto, NULL)) != 0) {
326 printf("ipsec_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n",
327 ipsec_family, result);
328 ctl_deregister(ipsec_kctlref);
329 return result;
330 }
331
332 /* Register the protocol plumbers */
333 if ((result = proto_register_plumber(PF_INET6, ipsec_family,
334 ipsec_attach_proto, NULL)) != 0) {
335 proto_unregister_plumber(PF_INET, ipsec_family);
336 ctl_deregister(ipsec_kctlref);
337 printf("ipsec_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n",
338 ipsec_family, result);
339 return result;
340 }
341
342 ipsec_lck_attr = lck_attr_alloc_init();
343 ipsec_lck_grp_attr = lck_grp_attr_alloc_init();
344 ipsec_lck_grp = lck_grp_alloc_init("ipsec", ipsec_lck_grp_attr);
345 lck_mtx_init(&ipsec_lock, ipsec_lck_grp, ipsec_lck_attr);
346
347 return 0;
348 }
349
350 /* Helpers */
351 int
352 ipsec_interface_isvalid (ifnet_t interface)
353 {
354 struct ipsec_pcb *pcb = NULL;
355
356 if (interface == NULL)
357 return 0;
358
359 pcb = ifnet_softc(interface);
360
361 if (pcb == NULL)
362 return 0;
363
364 /* When ctl disconnects, ipsec_unit is set to 0 */
365 if (pcb->ipsec_unit == 0)
366 return 0;
367
368 return 1;
369 }
370
371 static errno_t
372 ipsec_ifnet_set_attrs(ifnet_t ifp)
373 {
374 /* Set flags and additional information. */
375 ifnet_set_mtu(ifp, 1500);
376 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
377
378 /* The interface must generate its own IPv6 LinkLocal address,
379 * if possible following the recommendation of RFC2472 to the 64bit interface ID
380 */
381 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
382
383 #if !IPSEC_NEXUS
384 /* Reset the stats in case as the interface may have been recycled */
385 struct ifnet_stats_param stats;
386 bzero(&stats, sizeof(struct ifnet_stats_param));
387 ifnet_set_stat(ifp, &stats);
388 #endif // !IPSEC_NEXUS
389
390 return (0);
391 }
392
393 #if IPSEC_NEXUS
394
395 static uuid_t ipsec_nx_dom_prov;
396
397 static errno_t
398 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
399 {
400 return 0;
401 }
402
403 static void
404 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
405 {
406 // Ignore
407 }
408
409 static errno_t
410 ipsec_register_nexus(void)
411 {
412 const struct kern_nexus_domain_provider_init dp_init = {
413 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
414 .nxdpi_flags = 0,
415 .nxdpi_init = ipsec_nxdp_init,
416 .nxdpi_fini = ipsec_nxdp_fini
417 };
418 errno_t err = 0;
419
420 /* ipsec_nxdp_init() is called before this function returns */
421 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
422 (const uint8_t *) "com.apple.ipsec",
423 &dp_init, sizeof(dp_init),
424 &ipsec_nx_dom_prov);
425 if (err != 0) {
426 printf("%s: failed to register domain provider\n", __func__);
427 return (err);
428 }
429 return (0);
430 }
431
432 static errno_t
433 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
434 {
435 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
436 pcb->ipsec_netif_nexus = nexus;
437 return (ipsec_ifnet_set_attrs(ifp));
438 }
439
440 static errno_t
441 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
442 proc_t p, kern_nexus_t nexus,
443 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
444 {
445 #pragma unused(nxprov, p)
446 #pragma unused(nexus, nexus_port, channel, ch_ctx)
447 return (0);
448 }
449
450 static errno_t
451 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
452 kern_channel_t channel)
453 {
454 #pragma unused(nxprov, channel)
455 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
456 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
457 return (ok ? 0 : ENXIO);
458 }
459
460 static void
461 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
462 kern_channel_t channel)
463 {
464 #pragma unused(nxprov, nexus, channel)
465 }
466
467 static void
468 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
469 kern_channel_t channel)
470 {
471 #pragma unused(nxprov, nexus, channel)
472 }
473
474 static void
475 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
476 kern_channel_t channel)
477 {
478 #pragma unused(nxprov, channel)
479 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
480 if (pcb->ipsec_netif_nexus == nexus) {
481 pcb->ipsec_netif_nexus = NULL;
482 }
483 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
484 }
485
486 static errno_t
487 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
488 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
489 void **ring_ctx)
490 {
491 #pragma unused(nxprov)
492 #pragma unused(channel)
493 #pragma unused(ring_ctx)
494 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
495 if (!is_tx_ring) {
496 VERIFY(pcb->ipsec_kpipe_rxring == NULL);
497 pcb->ipsec_kpipe_rxring = ring;
498 } else {
499 VERIFY(pcb->ipsec_kpipe_txring == NULL);
500 pcb->ipsec_kpipe_txring = ring;
501 }
502 return 0;
503 }
504
505 static void
506 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
507 kern_channel_ring_t ring)
508 {
509 #pragma unused(nxprov)
510 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
511 if (pcb->ipsec_kpipe_rxring == ring) {
512 pcb->ipsec_kpipe_rxring = NULL;
513 } else if (pcb->ipsec_kpipe_txring == ring) {
514 pcb->ipsec_kpipe_txring = NULL;
515 }
516 }
517
518 static errno_t
519 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
520 kern_channel_ring_t tx_ring, uint32_t flags)
521 {
522 #pragma unused(nxprov)
523 #pragma unused(flags)
524 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
525
526 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
527 int channel_enabled = pcb->ipsec_kpipe_enabled;
528 if (!channel_enabled) {
529 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
530 return 0;
531 }
532
533 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
534 if (tx_slot == NULL) {
535 // Nothing to write, bail
536 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
537 return 0;
538 }
539
540 // Signal the netif ring to read
541 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring;
542 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
543
544 if (rx_ring != NULL) {
545 kern_channel_notify(rx_ring, 0);
546 }
547 return 0;
548 }
549
550 static mbuf_t
551 ipsec_encrypt_mbuf(ifnet_t interface,
552 mbuf_t data)
553 {
554 struct ipsec_output_state ipsec_state;
555 int error = 0;
556 uint32_t af;
557
558 // Make sure this packet isn't looping through the interface
559 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
560 error = -1;
561 goto ipsec_output_err;
562 }
563
564 // Mark the interface so NECP can evaluate tunnel policy
565 necp_mark_packet_from_interface(data, interface);
566
567 struct ip *ip = mtod(data, struct ip *);
568 u_int ip_version = ip->ip_v;
569
570 switch (ip_version) {
571 case 4: {
572 af = AF_INET;
573
574 memset(&ipsec_state, 0, sizeof(ipsec_state));
575 ipsec_state.m = data;
576 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
577 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
578
579 error = ipsec4_interface_output(&ipsec_state, interface);
580 if (error == 0 && ipsec_state.tunneled == 6) {
581 // Tunneled in IPv6 - packet is gone
582 // TODO: Don't lose mbuf
583 data = NULL;
584 goto done;
585 }
586
587 data = ipsec_state.m;
588 if (error || data == NULL) {
589 if (error) {
590 printf("ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
591 }
592 goto ipsec_output_err;
593 }
594 goto done;
595 }
596 case 6: {
597 af = AF_INET6;
598
599 data = ipsec6_splithdr(data);
600 if (data == NULL) {
601 printf("ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
602 goto ipsec_output_err;
603 }
604
605 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
606
607 memset(&ipsec_state, 0, sizeof(ipsec_state));
608 ipsec_state.m = data;
609 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
610 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
611
612 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
613 if (error == 0 && ipsec_state.tunneled == 4) {
614 // Tunneled in IPv4 - packet is gone
615 // TODO: Don't lose mbuf
616 data = NULL;
617 goto done;
618 }
619 data = ipsec_state.m;
620 if (error || data == NULL) {
621 if (error) {
622 printf("ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
623 }
624 goto ipsec_output_err;
625 }
626 goto done;
627 }
628 default: {
629 printf("ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
630 error = -1;
631 goto ipsec_output_err;
632 }
633 }
634
635 done:
636 return data;
637
638 ipsec_output_err:
639 if (data) {
640 mbuf_freem(data);
641 }
642 return NULL;
643 }
644
645 static errno_t
646 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
647 kern_channel_ring_t rx_ring, uint32_t flags)
648 {
649 #pragma unused(nxprov)
650 #pragma unused(flags)
651 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
652 struct kern_channel_ring_stat_increment rx_ring_stats;
653
654 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
655
656 int channel_enabled = pcb->ipsec_kpipe_enabled;
657 if (!channel_enabled) {
658 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
659 return 0;
660 }
661
662 // Reclaim user-released slots
663 (void) kern_channel_reclaim(rx_ring);
664
665 uint32_t avail = kern_channel_available_slot_count(rx_ring);
666 if (avail == 0) {
667 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
668 return 0;
669 }
670
671 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring;
672 if (tx_ring == NULL) {
673 // Net-If TX ring not set up yet, nothing to read
674 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
675 return 0;
676 }
677
678 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
679
680 // Unlock ipsec before entering ring
681 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
682
683 (void)kr_enter(tx_ring, TRUE);
684
685 // Lock again after entering and validate
686 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
687 if (tx_ring != pcb->ipsec_netif_txring) {
688 // Ring no longer valid
689 // Unlock first, then exit ring
690 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
691 kr_exit(tx_ring);
692 return 0;
693 }
694
695
696 struct kern_channel_ring_stat_increment tx_ring_stats;
697 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
698 kern_channel_slot_t tx_pslot = NULL;
699 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
700 if (tx_slot == NULL) {
701 // Nothing to read, don't bother signalling
702 // Unlock first, then exit ring
703 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
704 kr_exit(tx_ring);
705 return 0;
706 }
707
708 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
709 VERIFY(rx_pp != NULL);
710 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
711 kern_channel_slot_t rx_pslot = NULL;
712 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
713
714 while (rx_slot != NULL && tx_slot != NULL) {
715 size_t length = 0;
716 mbuf_t data = NULL;
717 errno_t error = 0;
718
719 // Allocate rx packet
720 kern_packet_t rx_ph = 0;
721 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
722 if (unlikely(error != 0)) {
723 printf("ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
724 pcb->ipsec_ifp->if_xname);
725 break;
726 }
727
728 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
729
730 // Advance TX ring
731 tx_pslot = tx_slot;
732 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
733
734 if (tx_ph == 0) {
735 continue;
736 }
737
738 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
739 VERIFY(tx_buf != NULL);
740 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
741 VERIFY(tx_baddr != NULL);
742 tx_baddr += kern_buflet_get_data_offset(tx_buf);
743
744 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
745
746 length = MIN(kern_packet_get_data_length(tx_ph),
747 pcb->ipsec_slot_size);
748
749 // Increment TX stats
750 tx_ring_stats.kcrsi_slots_transferred++;
751 tx_ring_stats.kcrsi_bytes_transferred += length;
752
753 if (length > 0) {
754 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
755 if (error == 0) {
756 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
757 if (error == 0) {
758 // Encrypt and send packet
759 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
760 } else {
761 printf("ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
762 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
763 STATS_INC(nifs, NETIF_STATS_DROPPED);
764 mbuf_freem(data);
765 data = NULL;
766 }
767 } else {
768 printf("ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
769 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
770 STATS_INC(nifs, NETIF_STATS_DROPPED);
771 }
772 } else {
773 printf("ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
774 STATS_INC(nifs, NETIF_STATS_BADLEN);
775 STATS_INC(nifs, NETIF_STATS_DROPPED);
776 }
777
778 if (data == NULL) {
779 printf("ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
780 kern_pbufpool_free(rx_pp, rx_ph);
781 break;
782 }
783
784 length = mbuf_pkthdr_len(data);
785 if (length > rx_pp->pp_buflet_size) {
786 // Flush data
787 mbuf_freem(data);
788 kern_pbufpool_free(rx_pp, rx_ph);
789 printf("ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
790 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
791 continue;
792 }
793
794 // Fillout rx packet
795 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
796 VERIFY(rx_buf != NULL);
797 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
798 VERIFY(rx_baddr != NULL);
799
800 // Copy-in data from mbuf to buflet
801 mbuf_copydata(data, 0, length, (void *)rx_baddr);
802 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
803
804 // Finalize and attach the packet
805 error = kern_buflet_set_data_offset(rx_buf, 0);
806 VERIFY(error == 0);
807 error = kern_buflet_set_data_length(rx_buf, length);
808 VERIFY(error == 0);
809 error = kern_packet_finalize(rx_ph);
810 VERIFY(error == 0);
811 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
812 VERIFY(error == 0);
813
814 STATS_INC(nifs, NETIF_STATS_TXPKTS);
815 STATS_INC(nifs, NETIF_STATS_TXCOPY_DIRECT);
816
817 rx_ring_stats.kcrsi_slots_transferred++;
818 rx_ring_stats.kcrsi_bytes_transferred += length;
819
820 if (!pcb->ipsec_ext_ifdata_stats) {
821 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, length, 0);
822 }
823
824 mbuf_freem(data);
825
826 rx_pslot = rx_slot;
827 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
828 }
829
830 if (rx_pslot) {
831 kern_channel_advance_slot(rx_ring, rx_pslot);
832 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
833 }
834
835 if (tx_pslot) {
836 kern_channel_advance_slot(tx_ring, tx_pslot);
837 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
838 (void)kern_channel_reclaim(tx_ring);
839 }
840
841 /* always reenable output */
842 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
843 if (error != 0) {
844 printf("ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
845 }
846
847 // Unlock first, then exit ring
848 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
849
850 if (tx_pslot != NULL) {
851 kern_channel_notify(tx_ring, 0);
852 }
853 kr_exit(tx_ring);
854
855 return 0;
856 }
857
858 static errno_t
859 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
860 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
861 void **ring_ctx)
862 {
863 #pragma unused(nxprov)
864 #pragma unused(channel)
865 #pragma unused(ring_ctx)
866 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
867 if (!is_tx_ring) {
868 VERIFY(pcb->ipsec_netif_rxring == NULL);
869 pcb->ipsec_netif_rxring = ring;
870 } else {
871 VERIFY(pcb->ipsec_netif_txring == NULL);
872 pcb->ipsec_netif_txring = ring;
873 }
874 return 0;
875 }
876
877 static void
878 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
879 kern_channel_ring_t ring)
880 {
881 #pragma unused(nxprov)
882 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
883 if (pcb->ipsec_netif_rxring == ring) {
884 pcb->ipsec_netif_rxring = NULL;
885 } else if (pcb->ipsec_netif_txring == ring) {
886 pcb->ipsec_netif_txring = NULL;
887 }
888 }
889
890 static bool
891 ipsec_netif_check_policy(mbuf_t data)
892 {
893 necp_kernel_policy_result necp_result = 0;
894 necp_kernel_policy_result_parameter necp_result_parameter = {};
895 uint32_t necp_matched_policy_id = 0;
896
897 // This packet has been marked with IP level policy, do not mark again.
898 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
899 return (true);
900 }
901
902 size_t length = mbuf_pkthdr_len(data);
903 if (length < sizeof(struct ip)) {
904 return (false);
905 }
906
907 struct ip *ip = mtod(data, struct ip *);
908 u_int ip_version = ip->ip_v;
909 switch (ip_version) {
910 case 4: {
911 necp_matched_policy_id = necp_ip_output_find_policy_match(data, 0, NULL,
912 &necp_result, &necp_result_parameter);
913 break;
914 }
915 case 6: {
916 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, 0, NULL,
917 &necp_result, &necp_result_parameter);
918 break;
919 }
920 default: {
921 return (false);
922 }
923 }
924
925 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
926 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
927 /* Drop and flow divert packets should be blocked at the IP layer */
928 return (false);
929 }
930
931 necp_mark_packet_from_ip(data, necp_matched_policy_id);
932 return (true);
933 }
934
935 static errno_t
936 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
937 kern_channel_ring_t tx_ring, uint32_t flags)
938 {
939 #pragma unused(nxprov)
940 #pragma unused(flags)
941 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
942
943 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
944
945 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
946
947 struct kern_channel_ring_stat_increment tx_ring_stats;
948 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
949 kern_channel_slot_t tx_pslot = NULL;
950 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
951
952 STATS_INC(nifs, NETIF_STATS_TXSYNC);
953
954 if (tx_slot == NULL) {
955 // Nothing to write, don't bother signalling
956 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
957 return 0;
958 }
959
960 if (pcb->ipsec_kpipe_enabled) {
961 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring;
962 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
963
964 // Signal the kernel pipe ring to read
965 if (rx_ring != NULL) {
966 kern_channel_notify(rx_ring, 0);
967 }
968 return 0;
969 }
970
971 // If we're here, we're injecting into the BSD stack
972 while (tx_slot != NULL) {
973 size_t length = 0;
974 mbuf_t data = NULL;
975
976 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
977
978 // Advance TX ring
979 tx_pslot = tx_slot;
980 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
981
982 if (tx_ph == 0) {
983 continue;
984 }
985
986 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
987 VERIFY(tx_buf != NULL);
988 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
989 VERIFY(tx_baddr != 0);
990 tx_baddr += kern_buflet_get_data_offset(tx_buf);
991
992 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
993
994 length = MIN(kern_packet_get_data_length(tx_ph),
995 pcb->ipsec_slot_size);
996
997 if (length > 0) {
998 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
999 if (error == 0) {
1000 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1001 if (error == 0) {
1002 // Mark packet from policy
1003 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1004 necp_mark_packet_from_ip(data, policy_id);
1005
1006 // Check policy with NECP
1007 if (!ipsec_netif_check_policy(data)) {
1008 printf("ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1009 STATS_INC(nifs, NETIF_STATS_DROPPED);
1010 mbuf_freem(data);
1011 data = NULL;
1012 } else {
1013 // Send through encryption
1014 error = ipsec_output(pcb->ipsec_ifp, data);
1015 if (error != 0) {
1016 printf("ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1017 }
1018 }
1019 } else {
1020 printf("ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1021 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1022 STATS_INC(nifs, NETIF_STATS_DROPPED);
1023 mbuf_freem(data);
1024 data = NULL;
1025 }
1026 } else {
1027 printf("ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1028 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1029 STATS_INC(nifs, NETIF_STATS_DROPPED);
1030 }
1031 } else {
1032 printf("ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1033 STATS_INC(nifs, NETIF_STATS_BADLEN);
1034 STATS_INC(nifs, NETIF_STATS_DROPPED);
1035 }
1036
1037 if (data == NULL) {
1038 printf("ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1039 break;
1040 }
1041
1042 STATS_INC(nifs, NETIF_STATS_TXPKTS);
1043 STATS_INC(nifs, NETIF_STATS_TXCOPY_MBUF);
1044
1045 tx_ring_stats.kcrsi_slots_transferred++;
1046 tx_ring_stats.kcrsi_bytes_transferred += length;
1047 }
1048
1049 if (tx_pslot) {
1050 kern_channel_advance_slot(tx_ring, tx_pslot);
1051 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1052 (void)kern_channel_reclaim(tx_ring);
1053 }
1054
1055 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1056
1057 return 0;
1058 }
1059
1060 static errno_t
1061 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1062 kern_channel_ring_t ring, __unused uint32_t flags)
1063 {
1064 #pragma unused(nxprov)
1065 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1066 boolean_t more = false;
1067 errno_t rc = 0;
1068
1069 /*
1070 * Refill and sync the ring; we may be racing against another thread doing
1071 * an RX sync that also wants to do kr_enter(), and so use the blocking
1072 * variant here.
1073 */
1074 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1075 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1076 printf("%s, tx refill failed %d\n", __func__, rc);
1077 }
1078
1079 (void) kr_enter(ring, TRUE);
1080 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1081
1082 if (pcb->ipsec_kpipe_enabled) {
1083 uint32_t tx_available = kern_channel_available_slot_count(ring);
1084 if (pcb->ipsec_netif_txring_size > 0 &&
1085 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1086 // No room left in tx ring, disable output for now
1087 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1088 if (error != 0) {
1089 printf("ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1090 }
1091 }
1092 }
1093
1094 if (pcb->ipsec_kpipe_enabled) {
1095 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring;
1096
1097 // Unlock while calling notify
1098 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1099 // Signal the kernel pipe ring to read
1100 if (rx_ring != NULL) {
1101 kern_channel_notify(rx_ring, 0);
1102 }
1103 } else {
1104 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1105 }
1106
1107 kr_exit(ring);
1108
1109 return (0);
1110 }
1111
1112 static errno_t
1113 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1114 kern_channel_ring_t rx_ring, uint32_t flags)
1115 {
1116 #pragma unused(nxprov)
1117 #pragma unused(flags)
1118 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1119 struct kern_channel_ring_stat_increment rx_ring_stats;
1120
1121 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1122
1123 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1124
1125 // Reclaim user-released slots
1126 (void) kern_channel_reclaim(rx_ring);
1127
1128 STATS_INC(nifs, NETIF_STATS_RXSYNC);
1129
1130 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1131 if (avail == 0) {
1132 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1133 return 0;
1134 }
1135
1136 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1137 VERIFY(rx_pp != NULL);
1138 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1139 kern_channel_slot_t rx_pslot = NULL;
1140 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1141
1142 while (rx_slot != NULL) {
1143 // Check for a waiting packet
1144 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1145 mbuf_t data = pcb->ipsec_input_chain;
1146 if (data == NULL) {
1147 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1148 break;
1149 }
1150
1151 // Allocate rx packet
1152 kern_packet_t rx_ph = 0;
1153 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1154 if (unlikely(error != 0)) {
1155 STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
1156 STATS_INC(nifs, NETIF_STATS_DROPPED);
1157 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1158 break;
1159 }
1160
1161 // Advance waiting packets
1162 pcb->ipsec_input_chain = data->m_nextpkt;
1163 data->m_nextpkt = NULL;
1164 if (pcb->ipsec_input_chain == NULL) {
1165 pcb->ipsec_input_chain_last = NULL;
1166 }
1167 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1168
1169 size_t length = mbuf_pkthdr_len(data);
1170
1171 if (length < sizeof(struct ip)) {
1172 // Flush data
1173 mbuf_freem(data);
1174 kern_pbufpool_free(rx_pp, rx_ph);
1175 STATS_INC(nifs, NETIF_STATS_BADLEN);
1176 STATS_INC(nifs, NETIF_STATS_DROPPED);
1177 printf("ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1178 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1179 continue;
1180 }
1181
1182 uint32_t af = 0;
1183 struct ip *ip = mtod(data, struct ip *);
1184 u_int ip_version = ip->ip_v;
1185 switch (ip_version) {
1186 case 4: {
1187 af = AF_INET;
1188 break;
1189 }
1190 case 6: {
1191 af = AF_INET6;
1192 break;
1193 }
1194 default: {
1195 printf("ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1196 pcb->ipsec_ifp->if_xname, ip_version);
1197 break;
1198 }
1199 }
1200
1201 if (length > rx_pp->pp_buflet_size ||
1202 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1203
1204 // We need to fragment to send up into the netif
1205
1206 u_int32_t fragment_mtu = rx_pp->pp_buflet_size;
1207 if (pcb->ipsec_frag_size_set &&
1208 pcb->ipsec_input_frag_size < rx_pp->pp_buflet_size) {
1209 fragment_mtu = pcb->ipsec_input_frag_size;
1210 }
1211
1212 mbuf_t fragment_chain = NULL;
1213 switch (af) {
1214 case AF_INET: {
1215 // ip_fragment expects the length in host order
1216 ip->ip_len = ntohs(ip->ip_len);
1217
1218 // ip_fragment will modify the original data, don't free
1219 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1220 if (fragment_error == 0 && data != NULL) {
1221 fragment_chain = data;
1222 } else {
1223 STATS_INC(nifs, NETIF_STATS_BADLEN);
1224 STATS_INC(nifs, NETIF_STATS_DROPPED);
1225 printf("ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1226 pcb->ipsec_ifp->if_xname, length, fragment_error);
1227 }
1228 break;
1229 }
1230 case AF_INET6: {
1231 if (length < sizeof(struct ip6_hdr)) {
1232 mbuf_freem(data);
1233 STATS_INC(nifs, NETIF_STATS_BADLEN);
1234 STATS_INC(nifs, NETIF_STATS_DROPPED);
1235 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1236 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1237 } else {
1238
1239 // ip6_do_fragmentation will free the original data on success only
1240 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1241 struct ip6_exthdrs exthdrs;
1242 memset(&exthdrs, 0, sizeof(exthdrs));
1243
1244 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1245 ip6, &exthdrs, fragment_mtu, ip6->ip6_nxt);
1246 if (fragment_error == 0 && data != NULL) {
1247 fragment_chain = data;
1248 } else {
1249 mbuf_freem(data);
1250 STATS_INC(nifs, NETIF_STATS_BADLEN);
1251 STATS_INC(nifs, NETIF_STATS_DROPPED);
1252 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1253 pcb->ipsec_ifp->if_xname, length, fragment_error);
1254 }
1255 }
1256 break;
1257 }
1258 default: {
1259 // Cannot fragment unknown families
1260 mbuf_freem(data);
1261 STATS_INC(nifs, NETIF_STATS_BADLEN);
1262 STATS_INC(nifs, NETIF_STATS_DROPPED);
1263 printf("ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1264 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1265 break;
1266 }
1267 }
1268
1269 if (fragment_chain != NULL) {
1270 // Add fragments to chain before continuing
1271 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1272 if (pcb->ipsec_input_chain != NULL) {
1273 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1274 } else {
1275 pcb->ipsec_input_chain = fragment_chain;
1276 }
1277 while (fragment_chain->m_nextpkt) {
1278 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1279 fragment_chain = fragment_chain->m_nextpkt;
1280 }
1281 pcb->ipsec_input_chain_last = fragment_chain;
1282 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1283 }
1284
1285 // Make sure to free unused rx packet
1286 kern_pbufpool_free(rx_pp, rx_ph);
1287
1288 continue;
1289 }
1290
1291 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1292
1293 // Fillout rx packet
1294 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1295 VERIFY(rx_buf != NULL);
1296 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1297 VERIFY(rx_baddr != NULL);
1298
1299 // Copy-in data from mbuf to buflet
1300 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1301 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1302
1303 // Finalize and attach the packet
1304 error = kern_buflet_set_data_offset(rx_buf, 0);
1305 VERIFY(error == 0);
1306 error = kern_buflet_set_data_length(rx_buf, length);
1307 VERIFY(error == 0);
1308 error = kern_packet_set_link_header_offset(rx_ph, 0);
1309 VERIFY(error == 0);
1310 error = kern_packet_set_network_header_offset(rx_ph, 0);
1311 VERIFY(error == 0);
1312 error = kern_packet_finalize(rx_ph);
1313 VERIFY(error == 0);
1314 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1315 VERIFY(error == 0);
1316
1317 STATS_INC(nifs, NETIF_STATS_RXPKTS);
1318 STATS_INC(nifs, NETIF_STATS_RXCOPY_MBUF);
1319 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1320
1321 rx_ring_stats.kcrsi_slots_transferred++;
1322 rx_ring_stats.kcrsi_bytes_transferred += length;
1323
1324 if (!pcb->ipsec_ext_ifdata_stats) {
1325 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1326 }
1327
1328 mbuf_freem(data);
1329
1330 // Advance ring
1331 rx_pslot = rx_slot;
1332 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1333 }
1334
1335 struct kern_channel_ring_stat_increment tx_ring_stats;
1336 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1337 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring;
1338 kern_channel_slot_t tx_pslot = NULL;
1339 kern_channel_slot_t tx_slot = NULL;
1340 if (tx_ring == NULL) {
1341 // Net-If TX ring not set up yet, nothing to read
1342 goto done;
1343 }
1344
1345
1346 // Unlock ipsec before entering ring
1347 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1348
1349 (void)kr_enter(tx_ring, TRUE);
1350
1351 // Lock again after entering and validate
1352 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1353
1354 if (tx_ring != pcb->ipsec_kpipe_txring) {
1355 goto done;
1356 }
1357
1358 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1359 if (tx_slot == NULL) {
1360 // Nothing to read, don't bother signalling
1361 goto done;
1362 }
1363
1364 while (rx_slot != NULL && tx_slot != NULL) {
1365 size_t length = 0;
1366 mbuf_t data = NULL;
1367 errno_t error = 0;
1368 uint32_t af;
1369
1370 // Allocate rx packet
1371 kern_packet_t rx_ph = 0;
1372 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1373 if (unlikely(error != 0)) {
1374 STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
1375 STATS_INC(nifs, NETIF_STATS_DROPPED);
1376 break;
1377 }
1378
1379 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1380
1381 // Advance TX ring
1382 tx_pslot = tx_slot;
1383 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1384
1385 if (tx_ph == 0) {
1386 continue;
1387 }
1388
1389 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1390 VERIFY(tx_buf != NULL);
1391 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1392 VERIFY(tx_baddr != 0);
1393 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1394
1395 length = MIN(kern_packet_get_data_length(tx_ph),
1396 pcb->ipsec_slot_size);
1397
1398 // Increment TX stats
1399 tx_ring_stats.kcrsi_slots_transferred++;
1400 tx_ring_stats.kcrsi_bytes_transferred += length;
1401
1402 if (length >= sizeof(struct ip)) {
1403 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1404 if (error == 0) {
1405 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1406 if (error == 0) {
1407 struct ip *ip = mtod(data, struct ip *);
1408 u_int ip_version = ip->ip_v;
1409 switch (ip_version) {
1410 case 4: {
1411 af = AF_INET;
1412 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1413 ip->ip_off = ntohs(ip->ip_off);
1414
1415 if (length < ip->ip_len) {
1416 printf("ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1417 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1418 STATS_INC(nifs, NETIF_STATS_BADLEN);
1419 STATS_INC(nifs, NETIF_STATS_DROPPED);
1420 mbuf_freem(data);
1421 data = NULL;
1422 } else {
1423 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1424 }
1425 break;
1426 }
1427 case 6: {
1428 if (length < sizeof(struct ip6_hdr)) {
1429 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1430 pcb->ipsec_ifp->if_xname, length);
1431 STATS_INC(nifs, NETIF_STATS_BADLEN);
1432 STATS_INC(nifs, NETIF_STATS_DROPPED);
1433 mbuf_freem(data);
1434 data = NULL;
1435 } else {
1436 af = AF_INET6;
1437 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1438 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1439 if (length < ip6_len) {
1440 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1441 pcb->ipsec_ifp->if_xname, length, ip6_len);
1442 STATS_INC(nifs, NETIF_STATS_BADLEN);
1443 STATS_INC(nifs, NETIF_STATS_DROPPED);
1444 mbuf_freem(data);
1445 data = NULL;
1446 } else {
1447 int offset = sizeof(struct ip6_hdr);
1448 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1449 }
1450 }
1451 break;
1452 }
1453 default: {
1454 printf("ipsec_netif_sync_rx %s: unknown ip version %u\n",
1455 pcb->ipsec_ifp->if_xname, ip_version);
1456 STATS_INC(nifs, NETIF_STATS_DROPPED);
1457 mbuf_freem(data);
1458 data = NULL;
1459 break;
1460 }
1461 }
1462 } else {
1463 printf("ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1464 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1465 STATS_INC(nifs, NETIF_STATS_DROPPED);
1466 mbuf_freem(data);
1467 data = NULL;
1468 }
1469 } else {
1470 printf("ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1471 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1472 STATS_INC(nifs, NETIF_STATS_DROPPED);
1473 }
1474 } else {
1475 printf("ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1476 STATS_INC(nifs, NETIF_STATS_BADLEN);
1477 STATS_INC(nifs, NETIF_STATS_DROPPED);
1478 }
1479
1480 if (data == NULL) {
1481 // Failed to get decrypted data data
1482 kern_pbufpool_free(rx_pp, rx_ph);
1483 continue;
1484 }
1485
1486 length = mbuf_pkthdr_len(data);
1487 if (length > rx_pp->pp_buflet_size) {
1488 // Flush data
1489 mbuf_freem(data);
1490 kern_pbufpool_free(rx_pp, rx_ph);
1491 STATS_INC(nifs, NETIF_STATS_BADLEN);
1492 STATS_INC(nifs, NETIF_STATS_DROPPED);
1493 printf("ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1494 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1495 continue;
1496 }
1497
1498 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1499
1500 // Fillout rx packet
1501 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1502 VERIFY(rx_buf != NULL);
1503 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1504 VERIFY(rx_baddr != NULL);
1505
1506 // Copy-in data from mbuf to buflet
1507 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1508 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1509
1510 // Finalize and attach the packet
1511 error = kern_buflet_set_data_offset(rx_buf, 0);
1512 VERIFY(error == 0);
1513 error = kern_buflet_set_data_length(rx_buf, length);
1514 VERIFY(error == 0);
1515 error = kern_packet_set_link_header_offset(rx_ph, 0);
1516 VERIFY(error == 0);
1517 error = kern_packet_set_network_header_offset(rx_ph, 0);
1518 VERIFY(error == 0);
1519 error = kern_packet_finalize(rx_ph);
1520 VERIFY(error == 0);
1521 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1522 VERIFY(error == 0);
1523
1524 STATS_INC(nifs, NETIF_STATS_RXPKTS);
1525 STATS_INC(nifs, NETIF_STATS_RXCOPY_DIRECT);
1526 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1527
1528 rx_ring_stats.kcrsi_slots_transferred++;
1529 rx_ring_stats.kcrsi_bytes_transferred += length;
1530
1531 if (!pcb->ipsec_ext_ifdata_stats) {
1532 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1533 }
1534
1535 mbuf_freem(data);
1536
1537 rx_pslot = rx_slot;
1538 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1539 }
1540
1541 done:
1542 if (rx_pslot) {
1543 kern_channel_advance_slot(rx_ring, rx_pslot);
1544 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1545 }
1546
1547 if (tx_pslot) {
1548 kern_channel_advance_slot(tx_ring, tx_pslot);
1549 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1550 (void)kern_channel_reclaim(tx_ring);
1551 }
1552
1553 // Unlock first, then exit ring
1554 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1555 if (tx_ring != NULL) {
1556 if (tx_pslot != NULL) {
1557 kern_channel_notify(tx_ring, 0);
1558 }
1559 kr_exit(tx_ring);
1560 }
1561
1562 return 0;
1563 }
1564
1565 static errno_t
1566 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1567 struct ifnet_init_eparams *init_params,
1568 struct ifnet **ifp)
1569 {
1570 errno_t err;
1571 nexus_controller_t controller = kern_nexus_shared_controller();
1572 struct kern_nexus_net_init net_init;
1573
1574 nexus_name_t provider_name;
1575 snprintf((char *)provider_name, sizeof(provider_name),
1576 "com.apple.netif.ipsec%d", pcb->ipsec_unit);
1577
1578 struct kern_nexus_provider_init prov_init = {
1579 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1580 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1581 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1582 .nxpi_connected = ipsec_nexus_connected,
1583 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1584 .nxpi_disconnected = ipsec_nexus_disconnected,
1585 .nxpi_ring_init = ipsec_netif_ring_init,
1586 .nxpi_ring_fini = ipsec_netif_ring_fini,
1587 .nxpi_slot_init = NULL,
1588 .nxpi_slot_fini = NULL,
1589 .nxpi_sync_tx = ipsec_netif_sync_tx,
1590 .nxpi_sync_rx = ipsec_netif_sync_rx,
1591 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1592 };
1593
1594 nexus_attr_t nxa = NULL;
1595 err = kern_nexus_attr_create(&nxa);
1596 IPSEC_IF_VERIFY(err == 0);
1597 if (err != 0) {
1598 printf("%s: kern_nexus_attr_create failed: %d\n",
1599 __func__, err);
1600 goto failed;
1601 }
1602
1603 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1604 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1605 VERIFY(err == 0);
1606
1607 // Reset ring size for netif nexus to limit memory usage
1608 uint64_t ring_size = pcb->ipsec_netif_ring_size;
1609 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1610 VERIFY(err == 0);
1611 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1612 VERIFY(err == 0);
1613
1614 pcb->ipsec_netif_txring_size = ring_size;
1615
1616 err = kern_nexus_controller_register_provider(controller,
1617 ipsec_nx_dom_prov,
1618 provider_name,
1619 &prov_init,
1620 sizeof(prov_init),
1621 nxa,
1622 &pcb->ipsec_nx.if_provider);
1623 IPSEC_IF_VERIFY(err == 0);
1624 if (err != 0) {
1625 printf("%s register provider failed, error %d\n",
1626 __func__, err);
1627 goto failed;
1628 }
1629
1630 bzero(&net_init, sizeof(net_init));
1631 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1632 net_init.nxneti_flags = 0;
1633 net_init.nxneti_eparams = init_params;
1634 net_init.nxneti_lladdr = NULL;
1635 net_init.nxneti_prepare = ipsec_netif_prepare;
1636 err = kern_nexus_controller_alloc_net_provider_instance(controller,
1637 pcb->ipsec_nx.if_provider,
1638 pcb,
1639 &pcb->ipsec_nx.if_instance,
1640 &net_init,
1641 ifp);
1642 IPSEC_IF_VERIFY(err == 0);
1643 if (err != 0) {
1644 printf("%s alloc_net_provider_instance failed, %d\n",
1645 __func__, err);
1646 kern_nexus_controller_deregister_provider(controller,
1647 pcb->ipsec_nx.if_provider);
1648 uuid_clear(pcb->ipsec_nx.if_provider);
1649 goto failed;
1650 }
1651
1652 failed:
1653 if (nxa) {
1654 kern_nexus_attr_destroy(nxa);
1655 }
1656 return (err);
1657 }
1658
1659 static void
1660 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1661 {
1662 nexus_controller_t controller = kern_nexus_shared_controller();
1663 errno_t err;
1664
1665 if (!uuid_is_null(instance)) {
1666 err = kern_nexus_controller_free_provider_instance(controller,
1667 instance);
1668 if (err != 0) {
1669 printf("%s free_provider_instance failed %d\n",
1670 __func__, err);
1671 }
1672 uuid_clear(instance);
1673 }
1674 if (!uuid_is_null(provider)) {
1675 err = kern_nexus_controller_deregister_provider(controller,
1676 provider);
1677 if (err != 0) {
1678 printf("%s deregister_provider %d\n", __func__, err);
1679 }
1680 uuid_clear(provider);
1681 }
1682 return;
1683 }
1684
1685 static void
1686 ipsec_nexus_detach(ipsec_nx_t nx)
1687 {
1688 nexus_controller_t controller = kern_nexus_shared_controller();
1689 errno_t err;
1690
1691 if (!uuid_is_null(nx->ms_host)) {
1692 err = kern_nexus_ifdetach(controller,
1693 nx->ms_instance,
1694 nx->ms_host);
1695 if (err != 0) {
1696 printf("%s: kern_nexus_ifdetach ms host failed %d\n",
1697 __func__, err);
1698 }
1699 }
1700
1701 if (!uuid_is_null(nx->ms_device)) {
1702 err = kern_nexus_ifdetach(controller,
1703 nx->ms_instance,
1704 nx->ms_device);
1705 if (err != 0) {
1706 printf("%s: kern_nexus_ifdetach ms device failed %d\n",
1707 __func__, err);
1708 }
1709 }
1710
1711 ipsec_detach_provider_and_instance(nx->if_provider,
1712 nx->if_instance);
1713 ipsec_detach_provider_and_instance(nx->ms_provider,
1714 nx->ms_instance);
1715
1716 memset(nx, 0, sizeof(*nx));
1717 }
1718
1719 static errno_t
1720 ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
1721 uint32_t subtype, const char *type_name,
1722 const char *ifname,
1723 uuid_t *provider, uuid_t *instance)
1724 {
1725 nexus_attr_t attr = NULL;
1726 nexus_controller_t controller = kern_nexus_shared_controller();
1727 uuid_t dom_prov;
1728 errno_t err;
1729 struct kern_nexus_init init;
1730 nexus_name_t provider_name;
1731
1732 err = kern_nexus_get_builtin_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1733 &dom_prov);
1734 IPSEC_IF_VERIFY(err == 0);
1735 if (err != 0) {
1736 printf("%s can't get %s provider, error %d\n",
1737 __func__, type_name, err);
1738 goto failed;
1739 }
1740
1741 err = kern_nexus_attr_create(&attr);
1742 IPSEC_IF_VERIFY(err == 0);
1743 if (err != 0) {
1744 printf("%s: kern_nexus_attr_create failed: %d\n",
1745 __func__, err);
1746 goto failed;
1747 }
1748
1749 err = kern_nexus_attr_set(attr, NEXUS_ATTR_EXTENSIONS, subtype);
1750 VERIFY(err == 0);
1751
1752 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1753 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1754 VERIFY(err == 0);
1755
1756 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1757 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
1758 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1759 VERIFY(err == 0);
1760 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
1761 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1762 VERIFY(err == 0);
1763
1764 snprintf((char *)provider_name, sizeof(provider_name),
1765 "com.apple.%s.%s", type_name, ifname);
1766 err = kern_nexus_controller_register_provider(controller,
1767 dom_prov,
1768 provider_name,
1769 NULL,
1770 0,
1771 attr,
1772 provider);
1773 kern_nexus_attr_destroy(attr);
1774 attr = NULL;
1775 IPSEC_IF_VERIFY(err == 0);
1776 if (err != 0) {
1777 printf("%s register %s provider failed, error %d\n",
1778 __func__, type_name, err);
1779 goto failed;
1780 }
1781 bzero(&init, sizeof (init));
1782 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1783 err = kern_nexus_controller_alloc_provider_instance(controller,
1784 *provider,
1785 NULL,
1786 instance, &init);
1787 IPSEC_IF_VERIFY(err == 0);
1788 if (err != 0) {
1789 printf("%s alloc_provider_instance %s failed, %d\n",
1790 __func__, type_name, err);
1791 kern_nexus_controller_deregister_provider(controller,
1792 *provider);
1793 uuid_clear(*provider);
1794 }
1795 failed:
1796 return (err);
1797 }
1798
1799 static errno_t
1800 ipsec_multistack_attach(struct ipsec_pcb *pcb)
1801 {
1802 nexus_controller_t controller = kern_nexus_shared_controller();
1803 errno_t err = 0;
1804 ipsec_nx_t nx = &pcb->ipsec_nx;
1805
1806 // Allocate multistack flowswitch
1807 err = ipsec_create_fs_provider_and_instance(pcb,
1808 NEXUS_EXTENSION_FSW_TYPE_MULTISTACK,
1809 "multistack",
1810 pcb->ipsec_ifp->if_xname,
1811 &nx->ms_provider,
1812 &nx->ms_instance);
1813 if (err != 0) {
1814 printf("%s: failed to create bridge provider and instance\n",
1815 __func__);
1816 goto failed;
1817 }
1818
1819 // Attach multistack to device port
1820 err = kern_nexus_ifattach(controller, nx->ms_instance,
1821 NULL, nx->if_instance,
1822 FALSE, &nx->ms_device);
1823 if (err != 0) {
1824 printf("%s kern_nexus_ifattach ms device %d\n", __func__, err);
1825 goto failed;
1826 }
1827
1828 // Attach multistack to host port
1829 err = kern_nexus_ifattach(controller, nx->ms_instance,
1830 NULL, nx->if_instance,
1831 TRUE, &nx->ms_host);
1832 if (err != 0) {
1833 printf("%s kern_nexus_ifattach ms host %d\n", __func__, err);
1834 goto failed;
1835 }
1836
1837 // Extract the agent UUID and save for later
1838 struct kern_nexus *multistack_nx = nx_find(nx->ms_instance, false);
1839 if (multistack_nx != NULL) {
1840 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(multistack_nx);
1841 if (flowswitch != NULL) {
1842 FSW_RLOCK(flowswitch);
1843 struct fsw_ms_context *ms_context = (struct fsw_ms_context *)flowswitch->fsw_ops_private;
1844 if (ms_context != NULL) {
1845 uuid_copy(nx->ms_agent, ms_context->mc_agent_uuid);
1846 } else {
1847 printf("ipsec_multistack_attach - fsw_ms_context is NULL\n");
1848 }
1849 FSW_UNLOCK(flowswitch);
1850 } else {
1851 printf("ipsec_multistack_attach - flowswitch is NULL\n");
1852 }
1853 nx_release(multistack_nx);
1854 } else {
1855 printf("ipsec_multistack_attach - unable to find multistack nexus\n");
1856 }
1857
1858 return (0);
1859
1860 failed:
1861 ipsec_nexus_detach(nx);
1862
1863 errno_t detach_error = 0;
1864 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
1865 panic("ipsec_multistack_attach - ifnet_detach failed: %d\n", detach_error);
1866 /* NOT REACHED */
1867 }
1868
1869 return (err);
1870 }
1871
1872 #pragma mark Kernel Pipe Nexus
1873
1874 static errno_t
1875 ipsec_register_kernel_pipe_nexus(void)
1876 {
1877 nexus_attr_t nxa = NULL;
1878 errno_t result;
1879
1880 lck_mtx_lock(&ipsec_lock);
1881 if (ipsec_ncd_refcount++) {
1882 lck_mtx_unlock(&ipsec_lock);
1883 return 0;
1884 }
1885
1886 result = kern_nexus_controller_create(&ipsec_ncd);
1887 if (result) {
1888 printf("%s: kern_nexus_controller_create failed: %d\n",
1889 __FUNCTION__, result);
1890 goto done;
1891 }
1892
1893 uuid_t dom_prov;
1894 result = kern_nexus_get_builtin_domain_provider(
1895 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1896 if (result) {
1897 printf("%s: kern_nexus_get_builtin_domain_provider failed: %d\n",
1898 __FUNCTION__, result);
1899 goto done;
1900 }
1901
1902 struct kern_nexus_provider_init prov_init = {
1903 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1904 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1905 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1906 .nxpi_connected = ipsec_nexus_connected,
1907 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
1908 .nxpi_disconnected = ipsec_nexus_disconnected,
1909 .nxpi_ring_init = ipsec_kpipe_ring_init,
1910 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
1911 .nxpi_slot_init = NULL,
1912 .nxpi_slot_fini = NULL,
1913 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
1914 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
1915 .nxpi_tx_doorbell = NULL,
1916 };
1917
1918 result = kern_nexus_attr_create(&nxa);
1919 if (result) {
1920 printf("%s: kern_nexus_attr_create failed: %d\n",
1921 __FUNCTION__, result);
1922 goto done;
1923 }
1924
1925 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
1926 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1927 VERIFY(result == 0);
1928
1929 // Reset ring size for kernel pipe nexus to limit memory usage
1930 uint64_t ring_size = if_ipsec_ring_size;
1931 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1932 VERIFY(result == 0);
1933 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1934 VERIFY(result == 0);
1935
1936 result = kern_nexus_controller_register_provider(ipsec_ncd,
1937 dom_prov,
1938 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
1939 &prov_init,
1940 sizeof(prov_init),
1941 nxa,
1942 &ipsec_kpipe_uuid);
1943 if (result) {
1944 printf("%s: kern_nexus_controller_register_provider failed: %d\n",
1945 __FUNCTION__, result);
1946 goto done;
1947 }
1948
1949 done:
1950 if (nxa) {
1951 kern_nexus_attr_destroy(nxa);
1952 }
1953
1954 if (result) {
1955 if (ipsec_ncd) {
1956 kern_nexus_controller_destroy(ipsec_ncd);
1957 ipsec_ncd = NULL;
1958 }
1959 ipsec_ncd_refcount = 0;
1960 }
1961
1962 lck_mtx_unlock(&ipsec_lock);
1963
1964 return result;
1965 }
1966
1967 static void
1968 ipsec_unregister_kernel_pipe_nexus(void)
1969 {
1970 lck_mtx_lock(&ipsec_lock);
1971
1972 VERIFY(ipsec_ncd_refcount > 0);
1973
1974 if (--ipsec_ncd_refcount == 0) {
1975 kern_nexus_controller_destroy(ipsec_ncd);
1976 ipsec_ncd = NULL;
1977 }
1978
1979 lck_mtx_unlock(&ipsec_lock);
1980 }
1981
1982 // For use by socket option, not internally
1983 static errno_t
1984 ipsec_disable_channel(struct ipsec_pcb *pcb)
1985 {
1986 errno_t result;
1987 int enabled;
1988 uuid_t uuid;
1989
1990 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
1991
1992 enabled = pcb->ipsec_kpipe_enabled;
1993 uuid_copy(uuid, pcb->ipsec_kpipe_uuid);
1994
1995 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid) == !enabled);
1996
1997 pcb->ipsec_kpipe_enabled = 0;
1998 uuid_clear(pcb->ipsec_kpipe_uuid);
1999
2000 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2001
2002 if (enabled) {
2003 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, uuid);
2004 } else {
2005 result = ENXIO;
2006 }
2007
2008 if (!result) {
2009 ipsec_unregister_kernel_pipe_nexus();
2010 }
2011
2012 return result;
2013 }
2014
2015 static errno_t
2016 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2017 {
2018 struct kern_nexus_init init;
2019 errno_t result;
2020
2021 result = ipsec_register_kernel_pipe_nexus();
2022 if (result) {
2023 return result;
2024 }
2025
2026 VERIFY(ipsec_ncd);
2027
2028 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2029
2030 if (pcb->ipsec_kpipe_enabled) {
2031 result = EEXIST; // return success instead?
2032 goto done;
2033 }
2034
2035 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid));
2036 bzero(&init, sizeof (init));
2037 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2038 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2039 ipsec_kpipe_uuid, pcb, &pcb->ipsec_kpipe_uuid, &init);
2040 if (result) {
2041 goto done;
2042 }
2043
2044 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2045 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2046 pcb->ipsec_kpipe_uuid, &port,
2047 proc_pid(proc), NULL, NULL, 0, NEXUS_BIND_PID);
2048 if (result) {
2049 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2050 pcb->ipsec_kpipe_uuid);
2051 uuid_clear(pcb->ipsec_kpipe_uuid);
2052 goto done;
2053 }
2054
2055 pcb->ipsec_kpipe_enabled = 1;
2056
2057 done:
2058 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2059
2060 if (result) {
2061 ipsec_unregister_kernel_pipe_nexus();
2062 }
2063
2064 return result;
2065 }
2066
2067 #endif // IPSEC_NEXUS
2068
2069
2070 /* Kernel control functions */
2071
2072 static inline void
2073 ipsec_free_pcb(struct ipsec_pcb *pcb, bool in_list)
2074 {
2075 #if IPSEC_NEXUS
2076 mbuf_freem_list(pcb->ipsec_input_chain);
2077 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, ipsec_lck_grp);
2078 #endif // IPSEC_NEXUS
2079 lck_rw_destroy(&pcb->ipsec_pcb_lock, ipsec_lck_grp);
2080 if (in_list) {
2081 lck_mtx_lock(&ipsec_lock);
2082 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2083 lck_mtx_unlock(&ipsec_lock);
2084 }
2085 zfree(ipsec_pcb_zone, pcb);
2086 }
2087
2088 static errno_t
2089 ipsec_ctl_bind(kern_ctl_ref kctlref,
2090 struct sockaddr_ctl *sac,
2091 void **unitinfo)
2092 {
2093 struct ipsec_pcb *pcb = zalloc(ipsec_pcb_zone);
2094 memset(pcb, 0, sizeof(*pcb));
2095
2096 /* Setup the protocol control block */
2097 *unitinfo = pcb;
2098 pcb->ipsec_ctlref = kctlref;
2099 pcb->ipsec_unit = sac->sc_unit;
2100 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2101
2102 #if IPSEC_NEXUS
2103 pcb->ipsec_use_netif = false;
2104 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2105 pcb->ipsec_netif_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
2106 pcb->ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
2107 pcb->ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
2108 #endif // IPSEC_NEXUS
2109
2110 lck_rw_init(&pcb->ipsec_pcb_lock, ipsec_lck_grp, ipsec_lck_attr);
2111 #if IPSEC_NEXUS
2112 lck_mtx_init(&pcb->ipsec_input_chain_lock, ipsec_lck_grp, ipsec_lck_attr);
2113 #endif // IPSEC_NEXUS
2114
2115 return (0);
2116 }
2117
2118 static errno_t
2119 ipsec_ctl_connect(kern_ctl_ref kctlref,
2120 struct sockaddr_ctl *sac,
2121 void **unitinfo)
2122 {
2123 struct ifnet_init_eparams ipsec_init = {};
2124 errno_t result = 0;
2125
2126 if (*unitinfo == NULL) {
2127 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2128 }
2129
2130 struct ipsec_pcb *pcb = *unitinfo;
2131
2132 lck_mtx_lock(&ipsec_lock);
2133
2134 /* Find some open interface id */
2135 u_int32_t chosen_unique_id = 1;
2136 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2137 if (next_pcb != NULL) {
2138 /* List was not empty, add one to the last item */
2139 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2140 next_pcb = NULL;
2141
2142 /*
2143 * If this wrapped the id number, start looking at
2144 * the front of the list for an unused id.
2145 */
2146 if (chosen_unique_id == 0) {
2147 /* Find the next unused ID */
2148 chosen_unique_id = 1;
2149 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2150 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2151 /* We found a gap */
2152 break;
2153 }
2154
2155 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2156 }
2157 }
2158 }
2159
2160 pcb->ipsec_unique_id = chosen_unique_id;
2161
2162 if (next_pcb != NULL) {
2163 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2164 } else {
2165 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2166 }
2167 lck_mtx_unlock(&ipsec_lock);
2168
2169 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2170 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2171 printf("ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2172
2173 /* Create the interface */
2174 bzero(&ipsec_init, sizeof(ipsec_init));
2175 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2176 ipsec_init.len = sizeof (ipsec_init);
2177
2178 #if IPSEC_NEXUS
2179 if (pcb->ipsec_use_netif) {
2180 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2181 } else
2182 #endif // IPSEC_NEXUS
2183 {
2184 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2185 ipsec_init.start = ipsec_start;
2186 }
2187 ipsec_init.name = "ipsec";
2188 ipsec_init.unit = pcb->ipsec_unit - 1;
2189 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2190 ipsec_init.uniqueid_len = strlen(pcb->ipsec_unique_name);
2191 ipsec_init.family = ipsec_family;
2192 ipsec_init.subfamily = IFNET_SUBFAMILY_IPSEC;
2193 ipsec_init.type = IFT_OTHER;
2194 ipsec_init.demux = ipsec_demux;
2195 ipsec_init.add_proto = ipsec_add_proto;
2196 ipsec_init.del_proto = ipsec_del_proto;
2197 ipsec_init.softc = pcb;
2198 ipsec_init.ioctl = ipsec_ioctl;
2199 ipsec_init.detach = ipsec_detached;
2200
2201 #if IPSEC_NEXUS
2202 if (pcb->ipsec_use_netif) {
2203 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2204 if (result != 0) {
2205 printf("ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2206 ipsec_free_pcb(pcb, true);
2207 *unitinfo = NULL;
2208 return result;
2209 }
2210
2211 result = ipsec_multistack_attach(pcb);
2212 if (result != 0) {
2213 printf("ipsec_ctl_connect - ipsec_multistack_attach failed: %d\n", result);
2214 *unitinfo = NULL;
2215 return result;
2216 }
2217
2218 /* Attach to bpf */
2219 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2220 } else
2221 #endif // IPSEC_NEXUS
2222 {
2223 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2224 if (result != 0) {
2225 printf("ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2226 ipsec_free_pcb(pcb, true);
2227 *unitinfo = NULL;
2228 return result;
2229 }
2230 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2231
2232 /* Attach the interface */
2233 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2234 if (result != 0) {
2235 printf("ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2236 ifnet_release(pcb->ipsec_ifp);
2237 ipsec_free_pcb(pcb, true);
2238 *unitinfo = NULL;
2239 return (result);
2240 }
2241
2242 /* Attach to bpf */
2243 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2244 }
2245
2246 /* The interfaces resoures allocated, mark it as running */
2247 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2248
2249 return (0);
2250 }
2251
2252 static errno_t
2253 ipsec_detach_ip(ifnet_t interface,
2254 protocol_family_t protocol,
2255 socket_t pf_socket)
2256 {
2257 errno_t result = EPROTONOSUPPORT;
2258
2259 /* Attempt a detach */
2260 if (protocol == PF_INET) {
2261 struct ifreq ifr;
2262
2263 bzero(&ifr, sizeof(ifr));
2264 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2265 ifnet_name(interface), ifnet_unit(interface));
2266
2267 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2268 }
2269 else if (protocol == PF_INET6) {
2270 struct in6_ifreq ifr6;
2271
2272 bzero(&ifr6, sizeof(ifr6));
2273 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2274 ifnet_name(interface), ifnet_unit(interface));
2275
2276 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2277 }
2278
2279 return result;
2280 }
2281
2282 static void
2283 ipsec_remove_address(ifnet_t interface,
2284 protocol_family_t protocol,
2285 ifaddr_t address,
2286 socket_t pf_socket)
2287 {
2288 errno_t result = 0;
2289
2290 /* Attempt a detach */
2291 if (protocol == PF_INET) {
2292 struct ifreq ifr;
2293
2294 bzero(&ifr, sizeof(ifr));
2295 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2296 ifnet_name(interface), ifnet_unit(interface));
2297 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2298 if (result != 0) {
2299 printf("ipsec_remove_address - ifaddr_address failed: %d", result);
2300 }
2301 else {
2302 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2303 if (result != 0) {
2304 printf("ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2305 }
2306 }
2307 }
2308 else if (protocol == PF_INET6) {
2309 struct in6_ifreq ifr6;
2310
2311 bzero(&ifr6, sizeof(ifr6));
2312 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2313 ifnet_name(interface), ifnet_unit(interface));
2314 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2315 sizeof(ifr6.ifr_addr));
2316 if (result != 0) {
2317 printf("ipsec_remove_address - ifaddr_address failed (v6): %d",
2318 result);
2319 }
2320 else {
2321 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2322 if (result != 0) {
2323 printf("ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2324 result);
2325 }
2326 }
2327 }
2328 }
2329
2330 static void
2331 ipsec_cleanup_family(ifnet_t interface,
2332 protocol_family_t protocol)
2333 {
2334 errno_t result = 0;
2335 socket_t pf_socket = NULL;
2336 ifaddr_t *addresses = NULL;
2337 int i;
2338
2339 if (protocol != PF_INET && protocol != PF_INET6) {
2340 printf("ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2341 return;
2342 }
2343
2344 /* Create a socket for removing addresses and detaching the protocol */
2345 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2346 if (result != 0) {
2347 if (result != EAFNOSUPPORT)
2348 printf("ipsec_cleanup_family - failed to create %s socket: %d\n",
2349 protocol == PF_INET ? "IP" : "IPv6", result);
2350 goto cleanup;
2351 }
2352
2353 /* always set SS_PRIV, we want to close and detach regardless */
2354 sock_setpriv(pf_socket, 1);
2355
2356 result = ipsec_detach_ip(interface, protocol, pf_socket);
2357 if (result == 0 || result == ENXIO) {
2358 /* We are done! We either detached or weren't attached. */
2359 goto cleanup;
2360 }
2361 else if (result != EBUSY) {
2362 /* Uh, not really sure what happened here... */
2363 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2364 goto cleanup;
2365 }
2366
2367 /*
2368 * At this point, we received an EBUSY error. This means there are
2369 * addresses attached. We should detach them and then try again.
2370 */
2371 result = ifnet_get_address_list_family(interface, &addresses, protocol);
2372 if (result != 0) {
2373 printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2374 ifnet_name(interface), ifnet_unit(interface),
2375 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2376 goto cleanup;
2377 }
2378
2379 for (i = 0; addresses[i] != 0; i++) {
2380 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2381 }
2382 ifnet_free_address_list(addresses);
2383 addresses = NULL;
2384
2385 /*
2386 * The addresses should be gone, we should try the remove again.
2387 */
2388 result = ipsec_detach_ip(interface, protocol, pf_socket);
2389 if (result != 0 && result != ENXIO) {
2390 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2391 }
2392
2393 cleanup:
2394 if (pf_socket != NULL)
2395 sock_close(pf_socket);
2396
2397 if (addresses != NULL)
2398 ifnet_free_address_list(addresses);
2399 }
2400
2401 static errno_t
2402 ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2403 __unused u_int32_t unit,
2404 void *unitinfo)
2405 {
2406 struct ipsec_pcb *pcb = unitinfo;
2407 ifnet_t ifp = NULL;
2408 errno_t result = 0;
2409
2410 if (pcb == NULL) {
2411 return EINVAL;
2412 }
2413
2414 #if IPSEC_NEXUS
2415 // Tell the nexus to stop all rings
2416 if (pcb->ipsec_netif_nexus != NULL) {
2417 kern_nexus_stop(pcb->ipsec_netif_nexus);
2418 }
2419 #endif // IPSEC_NEXUS
2420
2421 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2422
2423 #if IPSEC_NEXUS
2424 uuid_t kpipe_uuid;
2425 uuid_copy(kpipe_uuid, pcb->ipsec_kpipe_uuid);
2426 uuid_clear(pcb->ipsec_kpipe_uuid);
2427 pcb->ipsec_kpipe_enabled = FALSE;
2428 #endif // IPSEC_NEXUS
2429
2430 pcb->ipsec_ctlref = NULL;
2431
2432 ifp = pcb->ipsec_ifp;
2433 if (ifp != NULL) {
2434 #if IPSEC_NEXUS
2435 if (pcb->ipsec_netif_nexus != NULL) {
2436 /*
2437 * Quiesce the interface and flush any pending outbound packets.
2438 */
2439 if_down(ifp);
2440
2441 /* Increment refcnt, but detach interface */
2442 ifnet_incr_iorefcnt(ifp);
2443 if ((result = ifnet_detach(ifp)) != 0) {
2444 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2445 /* NOT REACHED */
2446 }
2447
2448 /*
2449 * We want to do everything in our power to ensure that the interface
2450 * really goes away when the socket is closed. We must remove IP/IPv6
2451 * addresses and detach the protocols. Finally, we can remove and
2452 * release the interface.
2453 */
2454 key_delsp_for_ipsec_if(ifp);
2455
2456 ipsec_cleanup_family(ifp, AF_INET);
2457 ipsec_cleanup_family(ifp, AF_INET6);
2458
2459 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2460
2461 if (!uuid_is_null(kpipe_uuid)) {
2462 if (kern_nexus_controller_free_provider_instance(ipsec_ncd, kpipe_uuid) == 0) {
2463 ipsec_unregister_kernel_pipe_nexus();
2464 }
2465 }
2466 ipsec_nexus_detach(&pcb->ipsec_nx);
2467
2468 /* Decrement refcnt to finish detaching and freeing */
2469 ifnet_decr_iorefcnt(ifp);
2470 } else
2471 #endif // IPSEC_NEXUS
2472 {
2473 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2474
2475 #if IPSEC_NEXUS
2476 if (!uuid_is_null(kpipe_uuid)) {
2477 if (kern_nexus_controller_free_provider_instance(ipsec_ncd, kpipe_uuid) == 0) {
2478 ipsec_unregister_kernel_pipe_nexus();
2479 }
2480 }
2481 #endif // IPSEC_NEXUS
2482
2483 /*
2484 * We want to do everything in our power to ensure that the interface
2485 * really goes away when the socket is closed. We must remove IP/IPv6
2486 * addresses and detach the protocols. Finally, we can remove and
2487 * release the interface.
2488 */
2489 key_delsp_for_ipsec_if(ifp);
2490
2491 ipsec_cleanup_family(ifp, AF_INET);
2492 ipsec_cleanup_family(ifp, AF_INET6);
2493
2494 /*
2495 * Detach now; ipsec_detach() will be called asynchronously once
2496 * the I/O reference count drops to 0. There we will invoke
2497 * ifnet_release().
2498 */
2499 if ((result = ifnet_detach(ifp)) != 0) {
2500 printf("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2501 }
2502 }
2503 } else {
2504 // Bound, but not connected
2505 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2506 ipsec_free_pcb(pcb, false);
2507 }
2508
2509 return 0;
2510 }
2511
2512 static errno_t
2513 ipsec_ctl_send(__unused kern_ctl_ref kctlref,
2514 __unused u_int32_t unit,
2515 __unused void *unitinfo,
2516 mbuf_t m,
2517 __unused int flags)
2518 {
2519 /* Receive messages from the control socket. Currently unused. */
2520 mbuf_freem(m);
2521 return 0;
2522 }
2523
2524 static errno_t
2525 ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
2526 __unused u_int32_t unit,
2527 void *unitinfo,
2528 int opt,
2529 void *data,
2530 size_t len)
2531 {
2532 struct ipsec_pcb *pcb = unitinfo;
2533 errno_t result = 0;
2534
2535 /* check for privileges for privileged options */
2536 switch (opt) {
2537 case IPSEC_OPT_FLAGS:
2538 case IPSEC_OPT_EXT_IFDATA_STATS:
2539 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
2540 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
2541 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2542 return EPERM;
2543 }
2544 break;
2545 }
2546
2547 switch (opt) {
2548 case IPSEC_OPT_FLAGS:
2549 if (len != sizeof(u_int32_t)) {
2550 result = EMSGSIZE;
2551 } else {
2552 pcb->ipsec_flags = *(u_int32_t *)data;
2553 }
2554 break;
2555
2556 case IPSEC_OPT_EXT_IFDATA_STATS:
2557 if (len != sizeof(int)) {
2558 result = EMSGSIZE;
2559 break;
2560 }
2561 if (pcb->ipsec_ifp == NULL) {
2562 // Only can set after connecting
2563 result = EINVAL;
2564 break;
2565 }
2566 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2567 break;
2568
2569 case IPSEC_OPT_INC_IFDATA_STATS_IN:
2570 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
2571 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
2572
2573 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
2574 result = EINVAL;
2575 break;
2576 }
2577 if (pcb->ipsec_ifp == NULL) {
2578 // Only can set after connecting
2579 result = EINVAL;
2580 break;
2581 }
2582 if (!pcb->ipsec_ext_ifdata_stats) {
2583 result = EINVAL;
2584 break;
2585 }
2586 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN)
2587 ifnet_stat_increment_in(pcb->ipsec_ifp, utsp->utsp_packets,
2588 utsp->utsp_bytes, utsp->utsp_errors);
2589 else
2590 ifnet_stat_increment_out(pcb->ipsec_ifp, utsp->utsp_packets,
2591 utsp->utsp_bytes, utsp->utsp_errors);
2592 break;
2593 }
2594
2595 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
2596 ifnet_t del_ifp = NULL;
2597 char name[IFNAMSIZ];
2598
2599 if (len > IFNAMSIZ - 1) {
2600 result = EMSGSIZE;
2601 break;
2602 }
2603 if (pcb->ipsec_ifp == NULL) {
2604 // Only can set after connecting
2605 result = EINVAL;
2606 break;
2607 }
2608 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2609 bcopy(data, name, len);
2610 name[len] = 0;
2611 result = ifnet_find_by_name(name, &del_ifp);
2612 }
2613 if (result == 0) {
2614 printf("%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
2615 __func__, pcb->ipsec_ifp->if_xname,
2616 del_ifp->if_xname);
2617
2618 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
2619 if (del_ifp)
2620 ifnet_release(del_ifp);
2621 }
2622 break;
2623 }
2624
2625 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
2626 if (len != sizeof(int)) {
2627 result = EMSGSIZE;
2628 break;
2629 }
2630 if (pcb->ipsec_ifp == NULL) {
2631 // Only can set after connecting
2632 result = EINVAL;
2633 break;
2634 }
2635 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
2636 if (output_service_class == MBUF_SC_UNSPEC) {
2637 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2638 } else {
2639 pcb->ipsec_output_service_class = output_service_class;
2640 }
2641 printf("%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
2642 __func__, pcb->ipsec_ifp->if_xname,
2643 pcb->ipsec_output_service_class);
2644 break;
2645 }
2646
2647 #if IPSEC_NEXUS
2648 case IPSEC_OPT_ENABLE_CHANNEL: {
2649 if (len != sizeof(int)) {
2650 result = EMSGSIZE;
2651 break;
2652 }
2653 if (pcb->ipsec_ifp == NULL) {
2654 // Only can set after connecting
2655 result = EINVAL;
2656 break;
2657 }
2658 if (*(int *)data) {
2659 result = ipsec_enable_channel(pcb, current_proc());
2660 } else {
2661 result = ipsec_disable_channel(pcb);
2662 }
2663 break;
2664 }
2665
2666 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
2667 if (len != sizeof(int)) {
2668 result = EMSGSIZE;
2669 break;
2670 }
2671 if (pcb->ipsec_ifp == NULL) {
2672 // Only can set after connecting
2673 result = EINVAL;
2674 break;
2675 }
2676 if (!if_enable_netagent) {
2677 result = ENOTSUP;
2678 break;
2679 }
2680 if (*(int *)data) {
2681 if (!uuid_is_null(pcb->ipsec_nx.ms_agent)) {
2682 if_add_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2683 }
2684 } else {
2685 if (!uuid_is_null(pcb->ipsec_nx.ms_agent)) {
2686 if_delete_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2687 }
2688 }
2689 break;
2690 }
2691
2692 case IPSEC_OPT_INPUT_FRAG_SIZE: {
2693 if (len != sizeof(u_int32_t)) {
2694 result = EMSGSIZE;
2695 break;
2696 }
2697 u_int32_t input_frag_size = *(u_int32_t *)data;
2698 if (input_frag_size <= sizeof(struct ip6_hdr)) {
2699 pcb->ipsec_frag_size_set = FALSE;
2700 pcb->ipsec_input_frag_size = 0;
2701 } else {
2702 printf("SET FRAG SIZE TO %u\n", input_frag_size);
2703 pcb->ipsec_frag_size_set = TRUE;
2704 pcb->ipsec_input_frag_size = input_frag_size;
2705 }
2706 break;
2707 }
2708 case IPSEC_OPT_ENABLE_NETIF: {
2709 if (len != sizeof(int)) {
2710 result = EMSGSIZE;
2711 break;
2712 }
2713 if (pcb->ipsec_ifp != NULL) {
2714 // Only can set before connecting
2715 result = EINVAL;
2716 break;
2717 }
2718 pcb->ipsec_use_netif = true;
2719 break;
2720 }
2721 case IPSEC_OPT_SLOT_SIZE: {
2722 if (len != sizeof(u_int32_t)) {
2723 result = EMSGSIZE;
2724 break;
2725 }
2726 if (pcb->ipsec_ifp != NULL) {
2727 // Only can set before connecting
2728 result = EINVAL;
2729 break;
2730 }
2731 u_int32_t slot_size = *(u_int32_t *)data;
2732 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
2733 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
2734 return (EINVAL);
2735 }
2736 pcb->ipsec_slot_size = slot_size;
2737 break;
2738 }
2739 case IPSEC_OPT_NETIF_RING_SIZE: {
2740 if (len != sizeof(u_int32_t)) {
2741 result = EMSGSIZE;
2742 break;
2743 }
2744 if (pcb->ipsec_ifp != NULL) {
2745 // Only can set before connecting
2746 result = EINVAL;
2747 break;
2748 }
2749 u_int32_t ring_size = *(u_int32_t *)data;
2750 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
2751 ring_size > IPSEC_IF_MAX_RING_SIZE) {
2752 return (EINVAL);
2753 }
2754 pcb->ipsec_netif_ring_size = ring_size;
2755 break;
2756 }
2757 case IPSEC_OPT_TX_FSW_RING_SIZE: {
2758 if (len != sizeof(u_int32_t)) {
2759 result = EMSGSIZE;
2760 break;
2761 }
2762 if (pcb->ipsec_ifp != NULL) {
2763 // Only can set before connecting
2764 result = EINVAL;
2765 break;
2766 }
2767 u_int32_t ring_size = *(u_int32_t *)data;
2768 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
2769 ring_size > IPSEC_IF_MAX_RING_SIZE) {
2770 return (EINVAL);
2771 }
2772 pcb->ipsec_tx_fsw_ring_size = ring_size;
2773 break;
2774 }
2775 case IPSEC_OPT_RX_FSW_RING_SIZE: {
2776 if (len != sizeof(u_int32_t)) {
2777 result = EMSGSIZE;
2778 break;
2779 }
2780 if (pcb->ipsec_ifp != NULL) {
2781 // Only can set before connecting
2782 result = EINVAL;
2783 break;
2784 }
2785 u_int32_t ring_size = *(u_int32_t *)data;
2786 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
2787 ring_size > IPSEC_IF_MAX_RING_SIZE) {
2788 return (EINVAL);
2789 }
2790 pcb->ipsec_rx_fsw_ring_size = ring_size;
2791 break;
2792 }
2793
2794 #endif // IPSEC_NEXUS
2795
2796 default:
2797 result = ENOPROTOOPT;
2798 break;
2799 }
2800
2801 return result;
2802 }
2803
2804 static errno_t
2805 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
2806 __unused u_int32_t unit,
2807 void *unitinfo,
2808 int opt,
2809 void *data,
2810 size_t *len)
2811 {
2812 struct ipsec_pcb *pcb = unitinfo;
2813 errno_t result = 0;
2814
2815 switch (opt) {
2816 case IPSEC_OPT_FLAGS: {
2817 if (*len != sizeof(u_int32_t)) {
2818 result = EMSGSIZE;
2819 } else {
2820 *(u_int32_t *)data = pcb->ipsec_flags;
2821 }
2822 break;
2823 }
2824
2825 case IPSEC_OPT_EXT_IFDATA_STATS: {
2826 if (*len != sizeof(int)) {
2827 result = EMSGSIZE;
2828 } else {
2829 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
2830 }
2831 break;
2832 }
2833
2834 case IPSEC_OPT_IFNAME: {
2835 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
2836 result = EMSGSIZE;
2837 } else {
2838 if (pcb->ipsec_ifp == NULL) {
2839 // Only can get after connecting
2840 result = EINVAL;
2841 break;
2842 }
2843 *len = snprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
2844 }
2845 break;
2846 }
2847
2848 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
2849 if (*len != sizeof(int)) {
2850 result = EMSGSIZE;
2851 } else {
2852 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
2853 }
2854 break;
2855 }
2856
2857 #if IPSEC_NEXUS
2858 case IPSEC_OPT_GET_CHANNEL_UUID: {
2859 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2860 if (uuid_is_null(pcb->ipsec_kpipe_uuid)) {
2861 result = ENXIO;
2862 } else if (*len != sizeof(uuid_t)) {
2863 result = EMSGSIZE;
2864 } else {
2865 uuid_copy(data, pcb->ipsec_kpipe_uuid);
2866 }
2867 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2868 break;
2869 }
2870
2871 case IPSEC_OPT_INPUT_FRAG_SIZE: {
2872 if (*len != sizeof(u_int32_t)) {
2873 result = EMSGSIZE;
2874 } else {
2875 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
2876 }
2877 break;
2878 }
2879 case IPSEC_OPT_SLOT_SIZE: {
2880 if (*len != sizeof(u_int32_t)) {
2881 result = EMSGSIZE;
2882 } else {
2883 *(u_int32_t *)data = pcb->ipsec_slot_size;
2884 }
2885 break;
2886 }
2887 case IPSEC_OPT_NETIF_RING_SIZE: {
2888 if (*len != sizeof(u_int32_t)) {
2889 result = EMSGSIZE;
2890 } else {
2891 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
2892 }
2893 break;
2894 }
2895 case IPSEC_OPT_TX_FSW_RING_SIZE: {
2896 if (*len != sizeof(u_int32_t)) {
2897 result = EMSGSIZE;
2898 } else {
2899 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
2900 }
2901 break;
2902 }
2903 case IPSEC_OPT_RX_FSW_RING_SIZE: {
2904 if (*len != sizeof(u_int32_t)) {
2905 result = EMSGSIZE;
2906 } else {
2907 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
2908 }
2909 break;
2910 }
2911
2912 #endif // IPSEC_NEXUS
2913
2914 default: {
2915 result = ENOPROTOOPT;
2916 break;
2917 }
2918 }
2919
2920 return result;
2921 }
2922
2923 /* Network Interface functions */
2924 static errno_t
2925 ipsec_output(ifnet_t interface,
2926 mbuf_t data)
2927 {
2928 struct ipsec_pcb *pcb = ifnet_softc(interface);
2929 struct ipsec_output_state ipsec_state;
2930 struct route ro;
2931 struct route_in6 ro6;
2932 int length;
2933 struct ip *ip;
2934 struct ip6_hdr *ip6;
2935 struct ip_out_args ipoa;
2936 struct ip6_out_args ip6oa;
2937 int error = 0;
2938 u_int ip_version = 0;
2939 int flags = 0;
2940 struct flowadv *adv = NULL;
2941
2942 // Make sure this packet isn't looping through the interface
2943 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
2944 error = EINVAL;
2945 goto ipsec_output_err;
2946 }
2947
2948 // Mark the interface so NECP can evaluate tunnel policy
2949 necp_mark_packet_from_interface(data, interface);
2950
2951 ip = mtod(data, struct ip *);
2952 ip_version = ip->ip_v;
2953
2954 switch (ip_version) {
2955 case 4: {
2956 #if IPSEC_NEXUS
2957 if (!pcb->ipsec_use_netif)
2958 #endif // IPSEC_NEXUS
2959 {
2960 int af = AF_INET;
2961 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
2962 }
2963
2964 /* Apply encryption */
2965 memset(&ipsec_state, 0, sizeof(ipsec_state));
2966 ipsec_state.m = data;
2967 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
2968 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
2969
2970 error = ipsec4_interface_output(&ipsec_state, interface);
2971 /* Tunneled in IPv6 - packet is gone */
2972 if (error == 0 && ipsec_state.tunneled == 6) {
2973 goto done;
2974 }
2975
2976 data = ipsec_state.m;
2977 if (error || data == NULL) {
2978 if (error) {
2979 printf("ipsec_output: ipsec4_output error %d.\n", error);
2980 }
2981 goto ipsec_output_err;
2982 }
2983
2984 /* Set traffic class, set flow */
2985 m_set_service_class(data, pcb->ipsec_output_service_class);
2986 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
2987 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
2988 data->m_pkthdr.pkt_proto = ip->ip_p;
2989 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
2990
2991 /* Flip endian-ness for ip_output */
2992 ip = mtod(data, struct ip *);
2993 NTOHS(ip->ip_len);
2994 NTOHS(ip->ip_off);
2995
2996 /* Increment statistics */
2997 length = mbuf_pkthdr_len(data);
2998 ifnet_stat_increment_out(interface, 1, length, 0);
2999
3000 /* Send to ip_output */
3001 memset(&ro, 0, sizeof(ro));
3002
3003 flags = (IP_OUTARGS | /* Passing out args to specify interface */
3004 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
3005
3006 memset(&ipoa, 0, sizeof(ipoa));
3007 ipoa.ipoa_flowadv.code = 0;
3008 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3009 if (ipsec_state.outgoing_if) {
3010 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3011 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3012 }
3013 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3014
3015 adv = &ipoa.ipoa_flowadv;
3016
3017 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3018 data = NULL;
3019
3020 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3021 error = ENOBUFS;
3022 ifnet_disable_output(interface);
3023 }
3024
3025 goto done;
3026 }
3027 case 6: {
3028 #if IPSEC_NEXUS
3029 if (!pcb->ipsec_use_netif)
3030 #endif // IPSEC_NEXUS
3031 {
3032 int af = AF_INET6;
3033 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3034 }
3035
3036 data = ipsec6_splithdr(data);
3037 if (data == NULL) {
3038 printf("ipsec_output: ipsec6_splithdr returned NULL\n");
3039 goto ipsec_output_err;
3040 }
3041
3042 ip6 = mtod(data, struct ip6_hdr *);
3043
3044 memset(&ipsec_state, 0, sizeof(ipsec_state));
3045 ipsec_state.m = data;
3046 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3047 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3048
3049 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3050 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
3051 goto done;
3052 }
3053 data = ipsec_state.m;
3054 if (error || data == NULL) {
3055 if (error) {
3056 printf("ipsec_output: ipsec6_output error %d\n", error);
3057 }
3058 goto ipsec_output_err;
3059 }
3060
3061 /* Set traffic class, set flow */
3062 m_set_service_class(data, pcb->ipsec_output_service_class);
3063 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3064 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3065 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3066 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3067
3068 /* Increment statistics */
3069 length = mbuf_pkthdr_len(data);
3070 ifnet_stat_increment_out(interface, 1, length, 0);
3071
3072 /* Send to ip6_output */
3073 memset(&ro6, 0, sizeof(ro6));
3074
3075 flags = IPV6_OUTARGS;
3076
3077 memset(&ip6oa, 0, sizeof(ip6oa));
3078 ip6oa.ip6oa_flowadv.code = 0;
3079 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3080 if (ipsec_state.outgoing_if) {
3081 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3082 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3083 }
3084 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3085
3086 adv = &ip6oa.ip6oa_flowadv;
3087
3088 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3089 data = NULL;
3090
3091 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3092 error = ENOBUFS;
3093 ifnet_disable_output(interface);
3094 }
3095
3096 goto done;
3097 }
3098 default: {
3099 printf("ipsec_output: Received unknown packet version %d.\n", ip_version);
3100 error = EINVAL;
3101 goto ipsec_output_err;
3102 }
3103 }
3104
3105 done:
3106 return error;
3107
3108 ipsec_output_err:
3109 if (data)
3110 mbuf_freem(data);
3111 goto done;
3112 }
3113
3114 static void
3115 ipsec_start(ifnet_t interface)
3116 {
3117 mbuf_t data;
3118 struct ipsec_pcb *pcb = ifnet_softc(interface);
3119
3120 VERIFY(pcb != NULL);
3121 for (;;) {
3122 if (ifnet_dequeue(interface, &data) != 0)
3123 break;
3124 if (ipsec_output(interface, data) != 0)
3125 break;
3126 }
3127 }
3128
3129 /* Network Interface functions */
3130 static errno_t
3131 ipsec_demux(__unused ifnet_t interface,
3132 mbuf_t data,
3133 __unused char *frame_header,
3134 protocol_family_t *protocol)
3135 {
3136 struct ip *ip;
3137 u_int ip_version;
3138
3139 while (data != NULL && mbuf_len(data) < 1) {
3140 data = mbuf_next(data);
3141 }
3142
3143 if (data == NULL)
3144 return ENOENT;
3145
3146 ip = mtod(data, struct ip *);
3147 ip_version = ip->ip_v;
3148
3149 switch(ip_version) {
3150 case 4:
3151 *protocol = PF_INET;
3152 return 0;
3153 case 6:
3154 *protocol = PF_INET6;
3155 return 0;
3156 default:
3157 break;
3158 }
3159
3160 return 0;
3161 }
3162
3163 static errno_t
3164 ipsec_add_proto(__unused ifnet_t interface,
3165 protocol_family_t protocol,
3166 __unused const struct ifnet_demux_desc *demux_array,
3167 __unused u_int32_t demux_count)
3168 {
3169 switch(protocol) {
3170 case PF_INET:
3171 return 0;
3172 case PF_INET6:
3173 return 0;
3174 default:
3175 break;
3176 }
3177
3178 return ENOPROTOOPT;
3179 }
3180
3181 static errno_t
3182 ipsec_del_proto(__unused ifnet_t interface,
3183 __unused protocol_family_t protocol)
3184 {
3185 return 0;
3186 }
3187
3188 static errno_t
3189 ipsec_ioctl(ifnet_t interface,
3190 u_long command,
3191 void *data)
3192 {
3193 struct ipsec_pcb *pcb = ifnet_softc(interface);
3194 errno_t result = 0;
3195
3196 switch(command) {
3197 case SIOCSIFMTU: {
3198 #if IPSEC_NEXUS
3199 if (pcb->ipsec_use_netif) {
3200 // Make sure we can fit packets in the channel buffers
3201 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
3202 result = EINVAL;
3203 } else {
3204 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
3205 }
3206 } else
3207 #endif // IPSEC_NEXUS
3208 {
3209 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
3210 }
3211 break;
3212 }
3213
3214 case SIOCSIFFLAGS:
3215 /* ifioctl() takes care of it */
3216 break;
3217
3218 default:
3219 result = EOPNOTSUPP;
3220 }
3221
3222 return result;
3223 }
3224
3225 static void
3226 ipsec_detached(ifnet_t interface)
3227 {
3228 struct ipsec_pcb *pcb = ifnet_softc(interface);
3229 (void)ifnet_release(interface);
3230 ipsec_free_pcb(pcb, true);
3231 }
3232
3233 /* Protocol Handlers */
3234
3235 static errno_t
3236 ipsec_proto_input(ifnet_t interface,
3237 protocol_family_t protocol,
3238 mbuf_t m,
3239 __unused char *frame_header)
3240 {
3241 mbuf_pkthdr_setrcvif(m, interface);
3242
3243 #if IPSEC_NEXUS
3244 struct ipsec_pcb *pcb = ifnet_softc(interface);
3245 if (!pcb->ipsec_use_netif)
3246 #endif // IPSEC_NEXUS
3247 {
3248 uint32_t af = 0;
3249 struct ip *ip = mtod(m, struct ip *);
3250 if (ip->ip_v == 4) {
3251 af = AF_INET;
3252 } else if (ip->ip_v == 6) {
3253 af = AF_INET6;
3254 }
3255 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
3256 }
3257 pktap_input(interface, protocol, m, NULL);
3258
3259 if (proto_input(protocol, m) != 0) {
3260 ifnet_stat_increment_in(interface, 0, 0, 1);
3261 m_freem(m);
3262 } else {
3263 ifnet_stat_increment_in(interface, 1, m->m_pkthdr.len, 0);
3264 }
3265
3266 return 0;
3267 }
3268
3269 static errno_t
3270 ipsec_proto_pre_output(__unused ifnet_t interface,
3271 protocol_family_t protocol,
3272 __unused mbuf_t *packet,
3273 __unused const struct sockaddr *dest,
3274 __unused void *route,
3275 __unused char *frame_type,
3276 __unused char *link_layer_dest)
3277 {
3278
3279 *(protocol_family_t *)(void *)frame_type = protocol;
3280 return 0;
3281 }
3282
3283 static errno_t
3284 ipsec_attach_proto(ifnet_t interface,
3285 protocol_family_t protocol)
3286 {
3287 struct ifnet_attach_proto_param proto;
3288 errno_t result;
3289
3290 bzero(&proto, sizeof(proto));
3291 proto.input = ipsec_proto_input;
3292 proto.pre_output = ipsec_proto_pre_output;
3293
3294 result = ifnet_attach_protocol(interface, protocol, &proto);
3295 if (result != 0 && result != EEXIST) {
3296 printf("ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
3297 protocol, result);
3298 }
3299
3300 return result;
3301 }
3302
3303 errno_t
3304 ipsec_inject_inbound_packet(ifnet_t interface,
3305 mbuf_t packet)
3306 {
3307 struct ipsec_pcb *pcb = ifnet_softc(interface);
3308
3309 #if IPSEC_NEXUS
3310 if (pcb->ipsec_use_netif) {
3311 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3312
3313 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
3314 if (pcb->ipsec_input_chain != NULL) {
3315 pcb->ipsec_input_chain_last->m_nextpkt = packet;
3316 } else {
3317 pcb->ipsec_input_chain = packet;
3318 }
3319 while (packet->m_nextpkt) {
3320 VERIFY(packet != packet->m_nextpkt);
3321 packet = packet->m_nextpkt;
3322 }
3323 pcb->ipsec_input_chain_last = packet;
3324 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
3325
3326 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring;
3327 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3328
3329 if (rx_ring != NULL) {
3330 kern_channel_notify(rx_ring, 0);
3331 }
3332
3333 return (0);
3334 } else
3335 #endif // IPSEC_NEXUS
3336 {
3337 errno_t error;
3338 protocol_family_t protocol;
3339 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
3340 return error;
3341 }
3342
3343 return ipsec_proto_input(interface, protocol, packet, NULL);
3344 }
3345 }
3346
3347 void
3348 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family)
3349 {
3350 if (packet != NULL && interface != NULL) {
3351 struct ipsec_pcb *pcb = ifnet_softc(interface);
3352 if (pcb != NULL) {
3353 /* Set traffic class, set flow */
3354 m_set_service_class(packet, pcb->ipsec_output_service_class);
3355 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3356 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
3357 if (family == AF_INET) {
3358 struct ip *ip = mtod(packet, struct ip *);
3359 packet->m_pkthdr.pkt_proto = ip->ip_p;
3360 } else if (family == AF_INET6) {
3361 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
3362 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3363 }
3364 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3365 }
3366 }
3367 }
3368
3369 void
3370 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
3371 {
3372 struct ipsec_pcb *pcb;
3373
3374 if (interface == NULL || ipoa == NULL)
3375 return;
3376 pcb = ifnet_softc(interface);
3377
3378 if (net_qos_policy_restricted == 0) {
3379 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
3380 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
3381 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
3382 net_qos_policy_restrict_avapps != 0) {
3383 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
3384 } else {
3385 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
3386 ipoa->ipoa_sotc = SO_TC_VO;
3387 }
3388 }
3389
3390 void
3391 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
3392 {
3393 struct ipsec_pcb *pcb;
3394
3395 if (interface == NULL || ip6oa == NULL)
3396 return;
3397 pcb = ifnet_softc(interface);
3398
3399 if (net_qos_policy_restricted == 0) {
3400 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
3401 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
3402 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
3403 net_qos_policy_restrict_avapps != 0) {
3404 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
3405 } else {
3406 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
3407 ip6oa->ip6oa_sotc = SO_TC_VO;
3408 }
3409 }