]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/if_ipsec.c
8ffac41fe2e6caa6d5b3d94c2f92d074493a843f
[apple/xnu.git] / bsd / net / if_ipsec.c
1 /*
2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/systm.h>
30 #include <sys/kern_control.h>
31 #include <net/kpi_protocol.h>
32 #include <net/kpi_interface.h>
33 #include <sys/socket.h>
34 #include <sys/socketvar.h>
35 #include <net/if.h>
36 #include <net/if_types.h>
37 #include <net/bpf.h>
38 #include <net/if_ipsec.h>
39 #include <sys/mbuf.h>
40 #include <sys/sockio.h>
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43 #include <netinet6/in6_var.h>
44 #include <netinet6/ip6_var.h>
45 #include <sys/kauth.h>
46 #include <netinet6/ipsec.h>
47 #include <netinet6/ipsec6.h>
48 #include <netinet6/esp.h>
49 #include <netinet6/esp6.h>
50 #include <netinet/ip.h>
51 #include <net/flowadv.h>
52 #include <net/necp.h>
53 #include <netkey/key.h>
54 #include <net/pktap.h>
55 #include <kern/zalloc.h>
56
57 #define IPSEC_NEXUS 0
58
59 extern int net_qos_policy_restricted;
60 extern int net_qos_policy_restrict_avapps;
61 extern unsigned int if_enable_netagent;
62
63 /* Kernel Control functions */
64 static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
65 void **unitinfo);
66 static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
67 void *unitinfo);
68 static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
69 void *unitinfo, mbuf_t m, int flags);
70 static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
71 int opt, void *data, size_t *len);
72 static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
73 int opt, void *data, size_t len);
74
75 /* Network Interface functions */
76 #if !IPSEC_NEXUS
77 static void ipsec_start(ifnet_t interface);
78 #endif // !IPSEC_NEXUS
79 static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
80 static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
81 protocol_family_t *protocol);
82 static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
83 const struct ifnet_demux_desc *demux_array,
84 u_int32_t demux_count);
85 static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
86 static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
87 static void ipsec_detached(ifnet_t interface);
88
89 /* Protocol handlers */
90 static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
91 static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
92 mbuf_t m, char *frame_header);
93 static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
94 mbuf_t *packet, const struct sockaddr *dest, void *route,
95 char *frame_type, char *link_layer_dest);
96
97 static kern_ctl_ref ipsec_kctlref;
98 static u_int32_t ipsec_family;
99 static lck_attr_t *ipsec_lck_attr;
100 static lck_grp_attr_t *ipsec_lck_grp_attr;
101 static lck_grp_t *ipsec_lck_grp;
102 static lck_mtx_t ipsec_lock;
103
104 #if IPSEC_NEXUS
105
106 SYSCTL_DECL(_net_ipsec);
107 SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
108 static int if_ipsec_verify_interface_creation = 0;
109 SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
110
111 #define IPSEC_IF_VERIFY(_e) if (unlikely(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
112
113 #define IPSEC_IF_DEFAULT_SLOT_SIZE 4096
114 #define IPSEC_IF_DEFAULT_RING_SIZE 64
115 #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
116 #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
117
118 #define IPSEC_IF_MIN_RING_SIZE 16
119 #define IPSEC_IF_MAX_RING_SIZE 1024
120
121 static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
122 static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
123 static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
124
125 static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
126 static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
127 static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
128
129 SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
130 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
131 SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
132 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
133 SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
134 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
135
136 static errno_t
137 ipsec_register_nexus(void);
138
139 typedef struct ipsec_nx {
140 uuid_t if_provider;
141 uuid_t if_instance;
142 uuid_t ms_provider;
143 uuid_t ms_instance;
144 uuid_t ms_device;
145 uuid_t ms_host;
146 uuid_t ms_agent;
147 } *ipsec_nx_t;
148
149 static nexus_controller_t ipsec_ncd;
150 static int ipsec_ncd_refcount;
151 static uuid_t ipsec_kpipe_uuid;
152
153 #endif // IPSEC_NEXUS
154
155 /* Control block allocated for each kernel control connection */
156 struct ipsec_pcb {
157 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
158 kern_ctl_ref ipsec_ctlref;
159 ifnet_t ipsec_ifp;
160 u_int32_t ipsec_unit;
161 u_int32_t ipsec_unique_id;
162 u_int32_t ipsec_flags;
163 u_int32_t ipsec_input_frag_size;
164 bool ipsec_frag_size_set;
165 int ipsec_ext_ifdata_stats;
166 mbuf_svc_class_t ipsec_output_service_class;
167 char ipsec_if_xname[IFXNAMSIZ];
168 char ipsec_unique_name[IFXNAMSIZ];
169 // PCB lock protects state fields, like ipsec_kpipe_enabled
170 decl_lck_rw_data(, ipsec_pcb_lock);
171 bool ipsec_output_disabled;
172
173 #if IPSEC_NEXUS
174 lck_mtx_t ipsec_input_chain_lock;
175 struct mbuf * ipsec_input_chain;
176 struct mbuf * ipsec_input_chain_last;
177 // Input chain lock protects the list of input mbufs
178 // The input chain lock must be taken AFTER the PCB lock if both are held
179 struct ipsec_nx ipsec_nx;
180 int ipsec_kpipe_enabled;
181 uuid_t ipsec_kpipe_uuid;
182 void * ipsec_kpipe_rxring;
183 void * ipsec_kpipe_txring;
184
185 kern_nexus_t ipsec_netif_nexus;
186 void * ipsec_netif_rxring;
187 void * ipsec_netif_txring;
188 uint64_t ipsec_netif_txring_size;
189 #endif // IPSEC_NEXUS
190 };
191
192 TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
193
194 #define IPSEC_PCB_ZONE_MAX 32
195 #define IPSEC_PCB_ZONE_NAME "net.if_ipsec"
196
197 static unsigned int ipsec_pcb_size; /* size of zone element */
198 static struct zone *ipsec_pcb_zone; /* zone for ipsec_pcb */
199
200 #define IPSECQ_MAXLEN 256
201
202 #if IPSEC_NEXUS
203 static int
204 sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
205 {
206 #pragma unused(arg1, arg2)
207 int value = if_ipsec_ring_size;
208
209 int error = sysctl_handle_int(oidp, &value, 0, req);
210 if (error || !req->newptr) {
211 return (error);
212 }
213
214 if (value < IPSEC_IF_MIN_RING_SIZE ||
215 value > IPSEC_IF_MAX_RING_SIZE) {
216 return (EINVAL);
217 }
218
219 if_ipsec_ring_size = value;
220
221 return (0);
222 }
223
224 static int
225 sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
226 {
227 #pragma unused(arg1, arg2)
228 int value = if_ipsec_tx_fsw_ring_size;
229
230 int error = sysctl_handle_int(oidp, &value, 0, req);
231 if (error || !req->newptr) {
232 return (error);
233 }
234
235 if (value < IPSEC_IF_MIN_RING_SIZE ||
236 value > IPSEC_IF_MAX_RING_SIZE) {
237 return (EINVAL);
238 }
239
240 if_ipsec_tx_fsw_ring_size = value;
241
242 return (0);
243 }
244
245 static int
246 sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
247 {
248 #pragma unused(arg1, arg2)
249 int value = if_ipsec_rx_fsw_ring_size;
250
251 int error = sysctl_handle_int(oidp, &value, 0, req);
252 if (error || !req->newptr) {
253 return (error);
254 }
255
256 if (value < IPSEC_IF_MIN_RING_SIZE ||
257 value > IPSEC_IF_MAX_RING_SIZE) {
258 return (EINVAL);
259 }
260
261 if_ipsec_rx_fsw_ring_size = value;
262
263 return (0);
264 }
265 #endif // IPSEC_NEXUS
266
267 errno_t
268 ipsec_register_control(void)
269 {
270 struct kern_ctl_reg kern_ctl;
271 errno_t result = 0;
272
273 /* Find a unique value for our interface family */
274 result = mbuf_tag_id_find(IPSEC_CONTROL_NAME, &ipsec_family);
275 if (result != 0) {
276 printf("ipsec_register_control - mbuf_tag_id_find_internal failed: %d\n", result);
277 return result;
278 }
279
280 ipsec_pcb_size = sizeof(struct ipsec_pcb);
281 ipsec_pcb_zone = zinit(ipsec_pcb_size,
282 IPSEC_PCB_ZONE_MAX * ipsec_pcb_size,
283 0, IPSEC_PCB_ZONE_NAME);
284 if (ipsec_pcb_zone == NULL) {
285 printf("ipsec_register_control - zinit(ipsec_pcb) failed");
286 return ENOMEM;
287 }
288
289 #if IPSEC_NEXUS
290 ipsec_register_nexus();
291 #endif // IPSEC_NEXUS
292
293 TAILQ_INIT(&ipsec_head);
294
295 bzero(&kern_ctl, sizeof(kern_ctl));
296 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
297 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
298 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */
299 kern_ctl.ctl_sendsize = 64 * 1024;
300 kern_ctl.ctl_recvsize = 64 * 1024;
301 kern_ctl.ctl_connect = ipsec_ctl_connect;
302 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
303 kern_ctl.ctl_send = ipsec_ctl_send;
304 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
305 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
306
307 result = ctl_register(&kern_ctl, &ipsec_kctlref);
308 if (result != 0) {
309 printf("ipsec_register_control - ctl_register failed: %d\n", result);
310 return result;
311 }
312
313 /* Register the protocol plumbers */
314 if ((result = proto_register_plumber(PF_INET, ipsec_family,
315 ipsec_attach_proto, NULL)) != 0) {
316 printf("ipsec_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n",
317 ipsec_family, result);
318 ctl_deregister(ipsec_kctlref);
319 return result;
320 }
321
322 /* Register the protocol plumbers */
323 if ((result = proto_register_plumber(PF_INET6, ipsec_family,
324 ipsec_attach_proto, NULL)) != 0) {
325 proto_unregister_plumber(PF_INET, ipsec_family);
326 ctl_deregister(ipsec_kctlref);
327 printf("ipsec_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n",
328 ipsec_family, result);
329 return result;
330 }
331
332 ipsec_lck_attr = lck_attr_alloc_init();
333 ipsec_lck_grp_attr = lck_grp_attr_alloc_init();
334 ipsec_lck_grp = lck_grp_alloc_init("ipsec", ipsec_lck_grp_attr);
335 lck_mtx_init(&ipsec_lock, ipsec_lck_grp, ipsec_lck_attr);
336
337 return 0;
338 }
339
340 /* Helpers */
341 int
342 ipsec_interface_isvalid (ifnet_t interface)
343 {
344 struct ipsec_pcb *pcb = NULL;
345
346 if (interface == NULL)
347 return 0;
348
349 pcb = ifnet_softc(interface);
350
351 if (pcb == NULL)
352 return 0;
353
354 /* When ctl disconnects, ipsec_unit is set to 0 */
355 if (pcb->ipsec_unit == 0)
356 return 0;
357
358 return 1;
359 }
360
361 static errno_t
362 ipsec_ifnet_set_attrs(ifnet_t ifp)
363 {
364 /* Set flags and additional information. */
365 ifnet_set_mtu(ifp, 1500);
366 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
367
368 /* The interface must generate its own IPv6 LinkLocal address,
369 * if possible following the recommendation of RFC2472 to the 64bit interface ID
370 */
371 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
372
373 #if !IPSEC_NEXUS
374 /* Reset the stats in case as the interface may have been recycled */
375 struct ifnet_stats_param stats;
376 bzero(&stats, sizeof(struct ifnet_stats_param));
377 ifnet_set_stat(ifp, &stats);
378 #endif // !IPSEC_NEXUS
379
380 return (0);
381 }
382
383 #if IPSEC_NEXUS
384
385 static uuid_t ipsec_nx_dom_prov;
386
387 static errno_t
388 ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
389 {
390 return 0;
391 }
392
393 static void
394 ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
395 {
396 // Ignore
397 }
398
399 static errno_t
400 ipsec_register_nexus(void)
401 {
402 const struct kern_nexus_domain_provider_init dp_init = {
403 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
404 .nxdpi_flags = 0,
405 .nxdpi_init = ipsec_nxdp_init,
406 .nxdpi_fini = ipsec_nxdp_fini
407 };
408 errno_t err = 0;
409
410 /* ipsec_nxdp_init() is called before this function returns */
411 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
412 (const uint8_t *) "com.apple.ipsec",
413 &dp_init, sizeof(dp_init),
414 &ipsec_nx_dom_prov);
415 if (err != 0) {
416 printf("%s: failed to register domain provider\n", __func__);
417 return (err);
418 }
419 return (0);
420 }
421
422 static errno_t
423 ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
424 {
425 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
426 pcb->ipsec_netif_nexus = nexus;
427 return (ipsec_ifnet_set_attrs(ifp));
428 }
429
430 static errno_t
431 ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
432 proc_t p, kern_nexus_t nexus,
433 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
434 {
435 #pragma unused(nxprov, p)
436 #pragma unused(nexus, nexus_port, channel, ch_ctx)
437 return (0);
438 }
439
440 static errno_t
441 ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
442 kern_channel_t channel)
443 {
444 #pragma unused(nxprov, channel)
445 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
446 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
447 return (ok ? 0 : ENXIO);
448 }
449
450 static void
451 ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
452 kern_channel_t channel)
453 {
454 #pragma unused(nxprov, nexus, channel)
455 }
456
457 static void
458 ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
459 kern_channel_t channel)
460 {
461 #pragma unused(nxprov, nexus, channel)
462 }
463
464 static void
465 ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
466 kern_channel_t channel)
467 {
468 #pragma unused(nxprov, channel)
469 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
470 if (pcb->ipsec_netif_nexus == nexus) {
471 pcb->ipsec_netif_nexus = NULL;
472 }
473 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
474 }
475
476 static errno_t
477 ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
478 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
479 void **ring_ctx)
480 {
481 #pragma unused(nxprov)
482 #pragma unused(channel)
483 #pragma unused(ring_ctx)
484 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
485 if (!is_tx_ring) {
486 VERIFY(pcb->ipsec_kpipe_rxring == NULL);
487 pcb->ipsec_kpipe_rxring = ring;
488 } else {
489 VERIFY(pcb->ipsec_kpipe_txring == NULL);
490 pcb->ipsec_kpipe_txring = ring;
491 }
492 return 0;
493 }
494
495 static void
496 ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
497 kern_channel_ring_t ring)
498 {
499 #pragma unused(nxprov)
500 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
501 if (pcb->ipsec_kpipe_rxring == ring) {
502 pcb->ipsec_kpipe_rxring = NULL;
503 } else if (pcb->ipsec_kpipe_txring == ring) {
504 pcb->ipsec_kpipe_txring = NULL;
505 }
506 }
507
508 static errno_t
509 ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
510 kern_channel_ring_t tx_ring, uint32_t flags)
511 {
512 #pragma unused(nxprov)
513 #pragma unused(flags)
514 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
515
516 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
517 int channel_enabled = pcb->ipsec_kpipe_enabled;
518 if (!channel_enabled) {
519 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
520 return 0;
521 }
522
523 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
524 if (tx_slot == NULL) {
525 // Nothing to write, bail
526 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
527 return 0;
528 }
529
530 // Signal the netif ring to read
531 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring;
532 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
533
534 if (rx_ring != NULL) {
535 kern_channel_notify(rx_ring, 0);
536 }
537 return 0;
538 }
539
540 static mbuf_t
541 ipsec_encrypt_mbuf(ifnet_t interface,
542 mbuf_t data)
543 {
544 struct ipsec_output_state ipsec_state;
545 int error = 0;
546 uint32_t af;
547
548 // Make sure this packet isn't looping through the interface
549 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
550 error = -1;
551 goto ipsec_output_err;
552 }
553
554 // Mark the interface so NECP can evaluate tunnel policy
555 necp_mark_packet_from_interface(data, interface);
556
557 struct ip *ip = mtod(data, struct ip *);
558 u_int ip_version = ip->ip_v;
559
560 switch (ip_version) {
561 case 4: {
562 af = AF_INET;
563
564 memset(&ipsec_state, 0, sizeof(ipsec_state));
565 ipsec_state.m = data;
566 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
567 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
568
569 error = ipsec4_interface_output(&ipsec_state, interface);
570 if (error == 0 && ipsec_state.tunneled == 6) {
571 // Tunneled in IPv6 - packet is gone
572 // TODO: Don't lose mbuf
573 data = NULL;
574 goto done;
575 }
576
577 data = ipsec_state.m;
578 if (error || data == NULL) {
579 if (error) {
580 printf("ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
581 }
582 goto ipsec_output_err;
583 }
584 goto done;
585 }
586 case 6: {
587 af = AF_INET6;
588
589 data = ipsec6_splithdr(data);
590 if (data == NULL) {
591 printf("ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
592 goto ipsec_output_err;
593 }
594
595 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
596
597 memset(&ipsec_state, 0, sizeof(ipsec_state));
598 ipsec_state.m = data;
599 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
600 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
601
602 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
603 if (error == 0 && ipsec_state.tunneled == 4) {
604 // Tunneled in IPv4 - packet is gone
605 // TODO: Don't lose mbuf
606 data = NULL;
607 goto done;
608 }
609 data = ipsec_state.m;
610 if (error || data == NULL) {
611 if (error) {
612 printf("ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
613 }
614 goto ipsec_output_err;
615 }
616 goto done;
617 }
618 default: {
619 printf("ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
620 error = -1;
621 goto ipsec_output_err;
622 }
623 }
624
625 done:
626 return data;
627
628 ipsec_output_err:
629 if (data) {
630 mbuf_freem(data);
631 }
632 return NULL;
633 }
634
635 static errno_t
636 ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
637 kern_channel_ring_t rx_ring, uint32_t flags)
638 {
639 #pragma unused(nxprov)
640 #pragma unused(flags)
641 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
642 struct kern_channel_ring_stat_increment rx_ring_stats;
643
644 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
645
646 int channel_enabled = pcb->ipsec_kpipe_enabled;
647 if (!channel_enabled) {
648 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
649 return 0;
650 }
651
652 // Reclaim user-released slots
653 (void) kern_channel_reclaim(rx_ring);
654
655 uint32_t avail = kern_channel_available_slot_count(rx_ring);
656 if (avail == 0) {
657 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
658 return 0;
659 }
660
661 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring;
662 if (tx_ring == NULL) {
663 // Net-If TX ring not set up yet, nothing to read
664 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
665 return 0;
666 }
667
668 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
669
670 // Unlock ipsec before entering ring
671 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
672
673 (void)kr_enter(tx_ring, TRUE);
674
675 // Lock again after entering and validate
676 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
677 if (tx_ring != pcb->ipsec_netif_txring) {
678 // Ring no longer valid
679 // Unlock first, then exit ring
680 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
681 kr_exit(tx_ring);
682 return 0;
683 }
684
685
686 struct kern_channel_ring_stat_increment tx_ring_stats;
687 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
688 kern_channel_slot_t tx_pslot = NULL;
689 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
690 if (tx_slot == NULL) {
691 // Nothing to read, don't bother signalling
692 // Unlock first, then exit ring
693 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
694 kr_exit(tx_ring);
695 return 0;
696 }
697
698 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
699 VERIFY(rx_pp != NULL);
700 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
701 kern_channel_slot_t rx_pslot = NULL;
702 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
703
704 while (rx_slot != NULL && tx_slot != NULL) {
705 size_t length = 0;
706 mbuf_t data = NULL;
707 errno_t error = 0;
708
709 // Allocate rx packet
710 kern_packet_t rx_ph = 0;
711 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
712 if (unlikely(error != 0)) {
713 printf("ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
714 pcb->ipsec_ifp->if_xname);
715 break;
716 }
717
718 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
719
720 // Advance TX ring
721 tx_pslot = tx_slot;
722 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
723
724 if (tx_ph == 0) {
725 continue;
726 }
727
728 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
729 VERIFY(tx_buf != NULL);
730 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
731 VERIFY(tx_baddr != NULL);
732 tx_baddr += kern_buflet_get_data_offset(tx_buf);
733
734 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
735
736 length = MIN(kern_packet_get_data_length(tx_ph),
737 IPSEC_IF_DEFAULT_SLOT_SIZE);
738
739 // Increment TX stats
740 tx_ring_stats.kcrsi_slots_transferred++;
741 tx_ring_stats.kcrsi_bytes_transferred += length;
742
743 if (length > 0) {
744 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
745 if (error == 0) {
746 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
747 if (error == 0) {
748 // Encrypt and send packet
749 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
750 } else {
751 printf("ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
752 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
753 STATS_INC(nifs, NETIF_STATS_DROPPED);
754 mbuf_freem(data);
755 data = NULL;
756 }
757 } else {
758 printf("ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
759 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
760 STATS_INC(nifs, NETIF_STATS_DROPPED);
761 }
762 } else {
763 printf("ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
764 STATS_INC(nifs, NETIF_STATS_BADLEN);
765 STATS_INC(nifs, NETIF_STATS_DROPPED);
766 }
767
768 if (data == NULL) {
769 printf("ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
770 kern_pbufpool_free(rx_pp, rx_ph);
771 break;
772 }
773
774 length = mbuf_pkthdr_len(data);
775 if (length > rx_pp->pp_buflet_size) {
776 // Flush data
777 mbuf_freem(data);
778 kern_pbufpool_free(rx_pp, rx_ph);
779 printf("ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
780 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
781 continue;
782 }
783
784 // Fillout rx packet
785 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
786 VERIFY(rx_buf != NULL);
787 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
788 VERIFY(rx_baddr != NULL);
789
790 // Copy-in data from mbuf to buflet
791 mbuf_copydata(data, 0, length, (void *)rx_baddr);
792 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
793
794 // Finalize and attach the packet
795 error = kern_buflet_set_data_offset(rx_buf, 0);
796 VERIFY(error == 0);
797 error = kern_buflet_set_data_length(rx_buf, length);
798 VERIFY(error == 0);
799 error = kern_packet_finalize(rx_ph);
800 VERIFY(error == 0);
801 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
802 VERIFY(error == 0);
803
804 STATS_INC(nifs, NETIF_STATS_TXPKTS);
805 STATS_INC(nifs, NETIF_STATS_TXCOPY_DIRECT);
806
807 rx_ring_stats.kcrsi_slots_transferred++;
808 rx_ring_stats.kcrsi_bytes_transferred += length;
809
810 if (!pcb->ipsec_ext_ifdata_stats) {
811 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, length, 0);
812 }
813
814 mbuf_freem(data);
815
816 rx_pslot = rx_slot;
817 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
818 }
819
820 if (rx_pslot) {
821 kern_channel_advance_slot(rx_ring, rx_pslot);
822 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
823 }
824
825 if (tx_pslot) {
826 kern_channel_advance_slot(tx_ring, tx_pslot);
827 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
828 (void)kern_channel_reclaim(tx_ring);
829 }
830
831 if (pcb->ipsec_output_disabled) {
832 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
833 if (error != 0) {
834 printf("ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
835 } else {
836 pcb->ipsec_output_disabled = false;
837 }
838 }
839
840 // Unlock first, then exit ring
841 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
842
843 if (tx_pslot != NULL) {
844 kern_channel_notify(tx_ring, 0);
845 }
846 kr_exit(tx_ring);
847
848 return 0;
849 }
850
851 static errno_t
852 ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
853 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
854 void **ring_ctx)
855 {
856 #pragma unused(nxprov)
857 #pragma unused(channel)
858 #pragma unused(ring_ctx)
859 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
860 if (!is_tx_ring) {
861 VERIFY(pcb->ipsec_netif_rxring == NULL);
862 pcb->ipsec_netif_rxring = ring;
863 } else {
864 VERIFY(pcb->ipsec_netif_txring == NULL);
865 pcb->ipsec_netif_txring = ring;
866 }
867 return 0;
868 }
869
870 static void
871 ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
872 kern_channel_ring_t ring)
873 {
874 #pragma unused(nxprov)
875 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
876 if (pcb->ipsec_netif_rxring == ring) {
877 pcb->ipsec_netif_rxring = NULL;
878 } else if (pcb->ipsec_netif_txring == ring) {
879 pcb->ipsec_netif_txring = NULL;
880 }
881 }
882
883 static bool
884 ipsec_netif_check_policy(mbuf_t data)
885 {
886 necp_kernel_policy_result necp_result = 0;
887 necp_kernel_policy_result_parameter necp_result_parameter = {};
888 uint32_t necp_matched_policy_id = 0;
889
890 // This packet has been marked with IP level policy, do not mark again.
891 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
892 return (true);
893 }
894
895 size_t length = mbuf_pkthdr_len(data);
896 if (length < sizeof(struct ip)) {
897 return (false);
898 }
899
900 struct ip *ip = mtod(data, struct ip *);
901 u_int ip_version = ip->ip_v;
902 switch (ip_version) {
903 case 4: {
904 necp_matched_policy_id = necp_ip_output_find_policy_match(data, 0, NULL,
905 &necp_result, &necp_result_parameter);
906 break;
907 }
908 case 6: {
909 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, 0, NULL,
910 &necp_result, &necp_result_parameter);
911 break;
912 }
913 default: {
914 return (false);
915 }
916 }
917
918 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
919 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
920 /* Drop and flow divert packets should be blocked at the IP layer */
921 return (false);
922 }
923
924 necp_mark_packet_from_ip(data, necp_matched_policy_id);
925 return (true);
926 }
927
928 static errno_t
929 ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
930 kern_channel_ring_t tx_ring, uint32_t flags)
931 {
932 #pragma unused(nxprov)
933 #pragma unused(flags)
934 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
935
936 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
937
938 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
939
940 struct kern_channel_ring_stat_increment tx_ring_stats;
941 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
942 kern_channel_slot_t tx_pslot = NULL;
943 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
944
945 STATS_INC(nifs, NETIF_STATS_TXSYNC);
946
947 if (tx_slot == NULL) {
948 // Nothing to write, don't bother signalling
949 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
950 return 0;
951 }
952
953 if (pcb->ipsec_kpipe_enabled) {
954 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring;
955 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
956
957 // Signal the kernel pipe ring to read
958 if (rx_ring != NULL) {
959 kern_channel_notify(rx_ring, 0);
960 }
961 return 0;
962 }
963
964 // If we're here, we're injecting into the BSD stack
965 while (tx_slot != NULL) {
966 size_t length = 0;
967 mbuf_t data = NULL;
968
969 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
970
971 // Advance TX ring
972 tx_pslot = tx_slot;
973 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
974
975 if (tx_ph == 0) {
976 continue;
977 }
978
979 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
980 VERIFY(tx_buf != NULL);
981 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
982 VERIFY(tx_baddr != 0);
983 tx_baddr += kern_buflet_get_data_offset(tx_buf);
984
985 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
986
987 length = MIN(kern_packet_get_data_length(tx_ph),
988 IPSEC_IF_DEFAULT_SLOT_SIZE);
989
990 if (length > 0) {
991 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
992 if (error == 0) {
993 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
994 if (error == 0) {
995 // Mark packet from policy
996 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
997 necp_mark_packet_from_ip(data, policy_id);
998
999 // Check policy with NECP
1000 if (!ipsec_netif_check_policy(data)) {
1001 printf("ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1002 STATS_INC(nifs, NETIF_STATS_DROPPED);
1003 mbuf_freem(data);
1004 data = NULL;
1005 } else {
1006 // Send through encryption
1007 error = ipsec_output(pcb->ipsec_ifp, data);
1008 if (error != 0) {
1009 printf("ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1010 }
1011 }
1012 } else {
1013 printf("ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1014 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1015 STATS_INC(nifs, NETIF_STATS_DROPPED);
1016 mbuf_freem(data);
1017 data = NULL;
1018 }
1019 } else {
1020 printf("ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1021 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1022 STATS_INC(nifs, NETIF_STATS_DROPPED);
1023 }
1024 } else {
1025 printf("ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1026 STATS_INC(nifs, NETIF_STATS_BADLEN);
1027 STATS_INC(nifs, NETIF_STATS_DROPPED);
1028 }
1029
1030 if (data == NULL) {
1031 printf("ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1032 break;
1033 }
1034
1035 STATS_INC(nifs, NETIF_STATS_TXPKTS);
1036 STATS_INC(nifs, NETIF_STATS_TXCOPY_MBUF);
1037
1038 tx_ring_stats.kcrsi_slots_transferred++;
1039 tx_ring_stats.kcrsi_bytes_transferred += length;
1040 }
1041
1042 if (tx_pslot) {
1043 kern_channel_advance_slot(tx_ring, tx_pslot);
1044 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1045 (void)kern_channel_reclaim(tx_ring);
1046 }
1047
1048 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1049
1050 return 0;
1051 }
1052
1053 static errno_t
1054 ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1055 kern_channel_ring_t ring, __unused uint32_t flags)
1056 {
1057 #pragma unused(nxprov)
1058 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1059
1060 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1061
1062 boolean_t more = false;
1063 errno_t rc = 0;
1064 do {
1065 rc = kern_channel_tx_refill(ring, UINT32_MAX, UINT32_MAX, true, &more);
1066 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1067 printf("%s, tx refill failed %d\n", __func__, rc);
1068 }
1069 } while ((rc == 0) && more);
1070
1071 if (pcb->ipsec_kpipe_enabled && !pcb->ipsec_output_disabled) {
1072 uint32_t tx_available = kern_channel_available_slot_count(ring);
1073 if (pcb->ipsec_netif_txring_size > 0 &&
1074 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1075 // No room left in tx ring, disable output for now
1076 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1077 if (error != 0) {
1078 printf("ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1079 } else {
1080 pcb->ipsec_output_disabled = true;
1081 }
1082 }
1083 }
1084
1085 if (pcb->ipsec_kpipe_enabled &&
1086 (((rc != 0) && (rc != EAGAIN)) || pcb->ipsec_output_disabled)) {
1087 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring;
1088
1089 // Unlock while calling notify
1090 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1091 // Signal the kernel pipe ring to read
1092 if (rx_ring != NULL) {
1093 kern_channel_notify(rx_ring, 0);
1094 }
1095 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1096 } else {
1097 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1098 }
1099
1100 return (0);
1101 }
1102
1103 static errno_t
1104 ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1105 kern_channel_ring_t rx_ring, uint32_t flags)
1106 {
1107 #pragma unused(nxprov)
1108 #pragma unused(flags)
1109 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1110 struct kern_channel_ring_stat_increment rx_ring_stats;
1111
1112 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1113
1114 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1115
1116 // Reclaim user-released slots
1117 (void) kern_channel_reclaim(rx_ring);
1118
1119 STATS_INC(nifs, NETIF_STATS_RXSYNC);
1120
1121 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1122 if (avail == 0) {
1123 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1124 return 0;
1125 }
1126
1127 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1128 VERIFY(rx_pp != NULL);
1129 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1130 kern_channel_slot_t rx_pslot = NULL;
1131 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1132
1133 while (rx_slot != NULL) {
1134 // Check for a waiting packet
1135 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1136 mbuf_t data = pcb->ipsec_input_chain;
1137 if (data == NULL) {
1138 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1139 break;
1140 }
1141
1142 // Allocate rx packet
1143 kern_packet_t rx_ph = 0;
1144 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1145 if (unlikely(error != 0)) {
1146 STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
1147 STATS_INC(nifs, NETIF_STATS_DROPPED);
1148 printf("ipsec_netif_sync_rx %s: failed to allocate packet\n",
1149 pcb->ipsec_ifp->if_xname);
1150 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1151 break;
1152 }
1153
1154 // Advance waiting packets
1155 pcb->ipsec_input_chain = data->m_nextpkt;
1156 data->m_nextpkt = NULL;
1157 if (pcb->ipsec_input_chain == NULL) {
1158 pcb->ipsec_input_chain_last = NULL;
1159 }
1160 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1161
1162 size_t length = mbuf_pkthdr_len(data);
1163
1164 if (length < sizeof(struct ip)) {
1165 // Flush data
1166 mbuf_freem(data);
1167 kern_pbufpool_free(rx_pp, rx_ph);
1168 STATS_INC(nifs, NETIF_STATS_BADLEN);
1169 STATS_INC(nifs, NETIF_STATS_DROPPED);
1170 printf("ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1171 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1172 continue;
1173 }
1174
1175 uint32_t af = 0;
1176 struct ip *ip = mtod(data, struct ip *);
1177 u_int ip_version = ip->ip_v;
1178 switch (ip_version) {
1179 case 4: {
1180 af = AF_INET;
1181 break;
1182 }
1183 case 6: {
1184 af = AF_INET6;
1185 break;
1186 }
1187 default: {
1188 printf("ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1189 pcb->ipsec_ifp->if_xname, ip_version);
1190 break;
1191 }
1192 }
1193
1194 if (length > rx_pp->pp_buflet_size ||
1195 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1196
1197 // We need to fragment to send up into the netif
1198
1199 u_int32_t fragment_mtu = rx_pp->pp_buflet_size;
1200 if (pcb->ipsec_frag_size_set &&
1201 pcb->ipsec_input_frag_size < rx_pp->pp_buflet_size) {
1202 fragment_mtu = pcb->ipsec_input_frag_size;
1203 }
1204
1205 mbuf_t fragment_chain = NULL;
1206 switch (af) {
1207 case AF_INET: {
1208 // ip_fragment expects the length in host order
1209 ip->ip_len = ntohs(ip->ip_len);
1210
1211 // ip_fragment will modify the original data, don't free
1212 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1213 if (fragment_error == 0 && data != NULL) {
1214 fragment_chain = data;
1215 } else {
1216 STATS_INC(nifs, NETIF_STATS_BADLEN);
1217 STATS_INC(nifs, NETIF_STATS_DROPPED);
1218 printf("ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1219 pcb->ipsec_ifp->if_xname, length, fragment_error);
1220 }
1221 break;
1222 }
1223 case AF_INET6: {
1224 if (length < sizeof(struct ip6_hdr)) {
1225 mbuf_freem(data);
1226 STATS_INC(nifs, NETIF_STATS_BADLEN);
1227 STATS_INC(nifs, NETIF_STATS_DROPPED);
1228 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1229 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1230 } else {
1231
1232 // ip6_do_fragmentation will free the original data on success only
1233 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1234 struct ip6_exthdrs exthdrs;
1235 memset(&exthdrs, 0, sizeof(exthdrs));
1236
1237 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1238 ip6, &exthdrs, fragment_mtu, ip6->ip6_nxt);
1239 if (fragment_error == 0 && data != NULL) {
1240 fragment_chain = data;
1241 } else {
1242 mbuf_freem(data);
1243 STATS_INC(nifs, NETIF_STATS_BADLEN);
1244 STATS_INC(nifs, NETIF_STATS_DROPPED);
1245 printf("ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1246 pcb->ipsec_ifp->if_xname, length, fragment_error);
1247 }
1248 }
1249 break;
1250 }
1251 default: {
1252 // Cannot fragment unknown families
1253 mbuf_freem(data);
1254 STATS_INC(nifs, NETIF_STATS_BADLEN);
1255 STATS_INC(nifs, NETIF_STATS_DROPPED);
1256 printf("ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1257 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1258 break;
1259 }
1260 }
1261
1262 if (fragment_chain != NULL) {
1263 // Add fragments to chain before continuing
1264 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1265 if (pcb->ipsec_input_chain != NULL) {
1266 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1267 } else {
1268 pcb->ipsec_input_chain = fragment_chain;
1269 }
1270 while (fragment_chain->m_nextpkt) {
1271 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1272 fragment_chain = fragment_chain->m_nextpkt;
1273 }
1274 pcb->ipsec_input_chain_last = fragment_chain;
1275 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1276 }
1277
1278 // Make sure to free unused rx packet
1279 kern_pbufpool_free(rx_pp, rx_ph);
1280
1281 continue;
1282 }
1283
1284 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1285
1286 // Fillout rx packet
1287 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1288 VERIFY(rx_buf != NULL);
1289 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1290 VERIFY(rx_baddr != NULL);
1291
1292 // Copy-in data from mbuf to buflet
1293 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1294 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1295
1296 // Finalize and attach the packet
1297 error = kern_buflet_set_data_offset(rx_buf, 0);
1298 VERIFY(error == 0);
1299 error = kern_buflet_set_data_length(rx_buf, length);
1300 VERIFY(error == 0);
1301 error = kern_packet_set_link_header_offset(rx_ph, 0);
1302 VERIFY(error == 0);
1303 error = kern_packet_set_network_header_offset(rx_ph, 0);
1304 VERIFY(error == 0);
1305 error = kern_packet_finalize(rx_ph);
1306 VERIFY(error == 0);
1307 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1308 VERIFY(error == 0);
1309
1310 STATS_INC(nifs, NETIF_STATS_RXPKTS);
1311 STATS_INC(nifs, NETIF_STATS_RXCOPY_MBUF);
1312 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1313
1314 rx_ring_stats.kcrsi_slots_transferred++;
1315 rx_ring_stats.kcrsi_bytes_transferred += length;
1316
1317 if (!pcb->ipsec_ext_ifdata_stats) {
1318 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1319 }
1320
1321 mbuf_freem(data);
1322
1323 // Advance ring
1324 rx_pslot = rx_slot;
1325 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1326 }
1327
1328 struct kern_channel_ring_stat_increment tx_ring_stats;
1329 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1330 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring;
1331 kern_channel_slot_t tx_pslot = NULL;
1332 kern_channel_slot_t tx_slot = NULL;
1333 if (tx_ring == NULL) {
1334 // Net-If TX ring not set up yet, nothing to read
1335 goto done;
1336 }
1337
1338
1339 // Unlock ipsec before entering ring
1340 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1341
1342 (void)kr_enter(tx_ring, TRUE);
1343
1344 // Lock again after entering and validate
1345 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1346
1347 if (tx_ring != pcb->ipsec_kpipe_txring) {
1348 goto done;
1349 }
1350
1351 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1352 if (tx_slot == NULL) {
1353 // Nothing to read, don't bother signalling
1354 goto done;
1355 }
1356
1357 while (rx_slot != NULL && tx_slot != NULL) {
1358 size_t length = 0;
1359 mbuf_t data = NULL;
1360 errno_t error = 0;
1361 uint32_t af;
1362
1363 // Allocate rx packet
1364 kern_packet_t rx_ph = 0;
1365 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1366 if (unlikely(error != 0)) {
1367 STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
1368 STATS_INC(nifs, NETIF_STATS_DROPPED);
1369 printf("ipsec_netif_sync_rx %s: failed to allocate packet\n",
1370 pcb->ipsec_ifp->if_xname);
1371 break;
1372 }
1373
1374 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1375
1376 // Advance TX ring
1377 tx_pslot = tx_slot;
1378 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1379
1380 if (tx_ph == 0) {
1381 continue;
1382 }
1383
1384 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1385 VERIFY(tx_buf != NULL);
1386 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1387 VERIFY(tx_baddr != 0);
1388 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1389
1390 length = MIN(kern_packet_get_data_length(tx_ph),
1391 IPSEC_IF_DEFAULT_SLOT_SIZE);
1392
1393 // Increment TX stats
1394 tx_ring_stats.kcrsi_slots_transferred++;
1395 tx_ring_stats.kcrsi_bytes_transferred += length;
1396
1397 if (length >= sizeof(struct ip)) {
1398 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1399 if (error == 0) {
1400 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1401 if (error == 0) {
1402 struct ip *ip = mtod(data, struct ip *);
1403 u_int ip_version = ip->ip_v;
1404 switch (ip_version) {
1405 case 4: {
1406 af = AF_INET;
1407 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1408 ip->ip_off = ntohs(ip->ip_off);
1409
1410 if (length < ip->ip_len) {
1411 printf("ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1412 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1413 STATS_INC(nifs, NETIF_STATS_BADLEN);
1414 STATS_INC(nifs, NETIF_STATS_DROPPED);
1415 mbuf_freem(data);
1416 data = NULL;
1417 } else {
1418 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1419 }
1420 break;
1421 }
1422 case 6: {
1423 if (length < sizeof(struct ip6_hdr)) {
1424 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1425 pcb->ipsec_ifp->if_xname, length);
1426 STATS_INC(nifs, NETIF_STATS_BADLEN);
1427 STATS_INC(nifs, NETIF_STATS_DROPPED);
1428 mbuf_freem(data);
1429 data = NULL;
1430 } else {
1431 af = AF_INET6;
1432 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1433 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1434 if (length < ip6_len) {
1435 printf("ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1436 pcb->ipsec_ifp->if_xname, length, ip6_len);
1437 STATS_INC(nifs, NETIF_STATS_BADLEN);
1438 STATS_INC(nifs, NETIF_STATS_DROPPED);
1439 mbuf_freem(data);
1440 data = NULL;
1441 } else {
1442 int offset = sizeof(struct ip6_hdr);
1443 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1444 }
1445 }
1446 break;
1447 }
1448 default: {
1449 printf("ipsec_netif_sync_rx %s: unknown ip version %u\n",
1450 pcb->ipsec_ifp->if_xname, ip_version);
1451 STATS_INC(nifs, NETIF_STATS_DROPPED);
1452 mbuf_freem(data);
1453 data = NULL;
1454 break;
1455 }
1456 }
1457 } else {
1458 printf("ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1459 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1460 STATS_INC(nifs, NETIF_STATS_DROPPED);
1461 mbuf_freem(data);
1462 data = NULL;
1463 }
1464 } else {
1465 printf("ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1466 STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
1467 STATS_INC(nifs, NETIF_STATS_DROPPED);
1468 }
1469 } else {
1470 printf("ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1471 STATS_INC(nifs, NETIF_STATS_BADLEN);
1472 STATS_INC(nifs, NETIF_STATS_DROPPED);
1473 }
1474
1475 if (data == NULL) {
1476 // Failed to get decrypted data data
1477 kern_pbufpool_free(rx_pp, rx_ph);
1478 continue;
1479 }
1480
1481 length = mbuf_pkthdr_len(data);
1482 if (length > rx_pp->pp_buflet_size) {
1483 // Flush data
1484 mbuf_freem(data);
1485 kern_pbufpool_free(rx_pp, rx_ph);
1486 STATS_INC(nifs, NETIF_STATS_BADLEN);
1487 STATS_INC(nifs, NETIF_STATS_DROPPED);
1488 printf("ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1489 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1490 continue;
1491 }
1492
1493 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1494
1495 // Fillout rx packet
1496 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1497 VERIFY(rx_buf != NULL);
1498 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1499 VERIFY(rx_baddr != NULL);
1500
1501 // Copy-in data from mbuf to buflet
1502 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1503 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1504
1505 // Finalize and attach the packet
1506 error = kern_buflet_set_data_offset(rx_buf, 0);
1507 VERIFY(error == 0);
1508 error = kern_buflet_set_data_length(rx_buf, length);
1509 VERIFY(error == 0);
1510 error = kern_packet_set_link_header_offset(rx_ph, 0);
1511 VERIFY(error == 0);
1512 error = kern_packet_set_network_header_offset(rx_ph, 0);
1513 VERIFY(error == 0);
1514 error = kern_packet_finalize(rx_ph);
1515 VERIFY(error == 0);
1516 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1517 VERIFY(error == 0);
1518
1519 STATS_INC(nifs, NETIF_STATS_RXPKTS);
1520 STATS_INC(nifs, NETIF_STATS_RXCOPY_DIRECT);
1521 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1522
1523 rx_ring_stats.kcrsi_slots_transferred++;
1524 rx_ring_stats.kcrsi_bytes_transferred += length;
1525
1526 if (!pcb->ipsec_ext_ifdata_stats) {
1527 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1528 }
1529
1530 mbuf_freem(data);
1531
1532 rx_pslot = rx_slot;
1533 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1534 }
1535
1536 done:
1537 if (rx_pslot) {
1538 kern_channel_advance_slot(rx_ring, rx_pslot);
1539 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1540 }
1541
1542 if (tx_pslot) {
1543 kern_channel_advance_slot(tx_ring, tx_pslot);
1544 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1545 (void)kern_channel_reclaim(tx_ring);
1546 }
1547
1548 // Unlock first, then exit ring
1549 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1550 if (tx_ring != NULL) {
1551 if (tx_pslot != NULL) {
1552 kern_channel_notify(tx_ring, 0);
1553 }
1554 kr_exit(tx_ring);
1555 }
1556
1557 return 0;
1558 }
1559
1560 static errno_t
1561 ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1562 struct ifnet_init_eparams *init_params,
1563 struct ifnet **ifp)
1564 {
1565 errno_t err;
1566 nexus_controller_t controller = kern_nexus_shared_controller();
1567 struct kern_nexus_net_init net_init;
1568
1569 nexus_name_t provider_name;
1570 snprintf((char *)provider_name, sizeof(provider_name),
1571 "com.apple.netif.ipsec%d", pcb->ipsec_unit);
1572
1573 struct kern_nexus_provider_init prov_init = {
1574 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1575 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1576 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1577 .nxpi_connected = ipsec_nexus_connected,
1578 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1579 .nxpi_disconnected = ipsec_nexus_disconnected,
1580 .nxpi_ring_init = ipsec_netif_ring_init,
1581 .nxpi_ring_fini = ipsec_netif_ring_fini,
1582 .nxpi_slot_init = NULL,
1583 .nxpi_slot_fini = NULL,
1584 .nxpi_sync_tx = ipsec_netif_sync_tx,
1585 .nxpi_sync_rx = ipsec_netif_sync_rx,
1586 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1587 };
1588
1589 nexus_attr_t nxa = NULL;
1590 err = kern_nexus_attr_create(&nxa);
1591 IPSEC_IF_VERIFY(err == 0);
1592 if (err != 0) {
1593 printf("%s: kern_nexus_attr_create failed: %d\n",
1594 __func__, err);
1595 goto failed;
1596 }
1597
1598 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
1599 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1600 VERIFY(err == 0);
1601
1602 // Reset ring size for netif nexus to limit memory usage
1603 uint64_t ring_size = if_ipsec_ring_size;
1604 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1605 VERIFY(err == 0);
1606 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1607 VERIFY(err == 0);
1608
1609 pcb->ipsec_netif_txring_size = ring_size;
1610
1611 err = kern_nexus_controller_register_provider(controller,
1612 ipsec_nx_dom_prov,
1613 provider_name,
1614 &prov_init,
1615 sizeof(prov_init),
1616 nxa,
1617 &pcb->ipsec_nx.if_provider);
1618 IPSEC_IF_VERIFY(err == 0);
1619 if (err != 0) {
1620 printf("%s register provider failed, error %d\n",
1621 __func__, err);
1622 goto failed;
1623 }
1624
1625 bzero(&net_init, sizeof(net_init));
1626 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1627 net_init.nxneti_flags = 0;
1628 net_init.nxneti_eparams = init_params;
1629 net_init.nxneti_lladdr = NULL;
1630 net_init.nxneti_prepare = ipsec_netif_prepare;
1631 err = kern_nexus_controller_alloc_net_provider_instance(controller,
1632 pcb->ipsec_nx.if_provider,
1633 pcb,
1634 &pcb->ipsec_nx.if_instance,
1635 &net_init,
1636 ifp);
1637 IPSEC_IF_VERIFY(err == 0);
1638 if (err != 0) {
1639 printf("%s alloc_net_provider_instance failed, %d\n",
1640 __func__, err);
1641 kern_nexus_controller_deregister_provider(controller,
1642 pcb->ipsec_nx.if_provider);
1643 uuid_clear(pcb->ipsec_nx.if_provider);
1644 goto failed;
1645 }
1646
1647 failed:
1648 if (nxa) {
1649 kern_nexus_attr_destroy(nxa);
1650 }
1651 return (err);
1652 }
1653
1654 static void
1655 ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1656 {
1657 nexus_controller_t controller = kern_nexus_shared_controller();
1658 errno_t err;
1659
1660 if (!uuid_is_null(instance)) {
1661 err = kern_nexus_controller_free_provider_instance(controller,
1662 instance);
1663 if (err != 0) {
1664 printf("%s free_provider_instance failed %d\n",
1665 __func__, err);
1666 }
1667 uuid_clear(instance);
1668 }
1669 if (!uuid_is_null(provider)) {
1670 err = kern_nexus_controller_deregister_provider(controller,
1671 provider);
1672 if (err != 0) {
1673 printf("%s deregister_provider %d\n", __func__, err);
1674 }
1675 uuid_clear(provider);
1676 }
1677 return;
1678 }
1679
1680 static void
1681 ipsec_nexus_detach(ipsec_nx_t nx)
1682 {
1683 nexus_controller_t controller = kern_nexus_shared_controller();
1684 errno_t err;
1685
1686 if (!uuid_is_null(nx->ms_host)) {
1687 err = kern_nexus_ifdetach(controller,
1688 nx->ms_instance,
1689 nx->ms_host);
1690 if (err != 0) {
1691 printf("%s: kern_nexus_ifdetach ms host failed %d\n",
1692 __func__, err);
1693 }
1694 }
1695
1696 if (!uuid_is_null(nx->ms_device)) {
1697 err = kern_nexus_ifdetach(controller,
1698 nx->ms_instance,
1699 nx->ms_device);
1700 if (err != 0) {
1701 printf("%s: kern_nexus_ifdetach ms device failed %d\n",
1702 __func__, err);
1703 }
1704 }
1705
1706 ipsec_detach_provider_and_instance(nx->if_provider,
1707 nx->if_instance);
1708 ipsec_detach_provider_and_instance(nx->ms_provider,
1709 nx->ms_instance);
1710
1711 memset(nx, 0, sizeof(*nx));
1712 }
1713
1714 static errno_t
1715 ipsec_create_fs_provider_and_instance(uint32_t subtype, const char *type_name,
1716 const char *ifname,
1717 uuid_t *provider, uuid_t *instance)
1718 {
1719 nexus_attr_t attr = NULL;
1720 nexus_controller_t controller = kern_nexus_shared_controller();
1721 uuid_t dom_prov;
1722 errno_t err;
1723 struct kern_nexus_init init;
1724 nexus_name_t provider_name;
1725
1726 err = kern_nexus_get_builtin_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1727 &dom_prov);
1728 IPSEC_IF_VERIFY(err == 0);
1729 if (err != 0) {
1730 printf("%s can't get %s provider, error %d\n",
1731 __func__, type_name, err);
1732 goto failed;
1733 }
1734
1735 err = kern_nexus_attr_create(&attr);
1736 IPSEC_IF_VERIFY(err == 0);
1737 if (err != 0) {
1738 printf("%s: kern_nexus_attr_create failed: %d\n",
1739 __func__, err);
1740 goto failed;
1741 }
1742
1743 err = kern_nexus_attr_set(attr, NEXUS_ATTR_EXTENSIONS, subtype);
1744 VERIFY(err == 0);
1745
1746 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
1747 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1748 VERIFY(err == 0);
1749
1750 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1751 uint64_t tx_ring_size = if_ipsec_tx_fsw_ring_size;
1752 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1753 VERIFY(err == 0);
1754 uint64_t rx_ring_size = if_ipsec_rx_fsw_ring_size;
1755 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1756 VERIFY(err == 0);
1757
1758 snprintf((char *)provider_name, sizeof(provider_name),
1759 "com.apple.%s.%s", type_name, ifname);
1760 err = kern_nexus_controller_register_provider(controller,
1761 dom_prov,
1762 provider_name,
1763 NULL,
1764 0,
1765 attr,
1766 provider);
1767 kern_nexus_attr_destroy(attr);
1768 attr = NULL;
1769 IPSEC_IF_VERIFY(err == 0);
1770 if (err != 0) {
1771 printf("%s register %s provider failed, error %d\n",
1772 __func__, type_name, err);
1773 goto failed;
1774 }
1775 bzero(&init, sizeof (init));
1776 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1777 err = kern_nexus_controller_alloc_provider_instance(controller,
1778 *provider,
1779 NULL,
1780 instance, &init);
1781 IPSEC_IF_VERIFY(err == 0);
1782 if (err != 0) {
1783 printf("%s alloc_provider_instance %s failed, %d\n",
1784 __func__, type_name, err);
1785 kern_nexus_controller_deregister_provider(controller,
1786 *provider);
1787 uuid_clear(*provider);
1788 }
1789 failed:
1790 return (err);
1791 }
1792
1793 static errno_t
1794 ipsec_multistack_attach(struct ipsec_pcb *pcb)
1795 {
1796 nexus_controller_t controller = kern_nexus_shared_controller();
1797 errno_t err = 0;
1798 ipsec_nx_t nx = &pcb->ipsec_nx;
1799
1800 // Allocate multistack flowswitch
1801 err = ipsec_create_fs_provider_and_instance(NEXUS_EXTENSION_FSW_TYPE_MULTISTACK,
1802 "multistack",
1803 pcb->ipsec_ifp->if_xname,
1804 &nx->ms_provider,
1805 &nx->ms_instance);
1806 if (err != 0) {
1807 printf("%s: failed to create bridge provider and instance\n",
1808 __func__);
1809 goto failed;
1810 }
1811
1812 // Attach multistack to device port
1813 err = kern_nexus_ifattach(controller, nx->ms_instance,
1814 NULL, nx->if_instance,
1815 FALSE, &nx->ms_device);
1816 if (err != 0) {
1817 printf("%s kern_nexus_ifattach ms device %d\n", __func__, err);
1818 goto failed;
1819 }
1820
1821 // Attach multistack to host port
1822 err = kern_nexus_ifattach(controller, nx->ms_instance,
1823 NULL, nx->if_instance,
1824 TRUE, &nx->ms_host);
1825 if (err != 0) {
1826 printf("%s kern_nexus_ifattach ms host %d\n", __func__, err);
1827 goto failed;
1828 }
1829
1830 // Extract the agent UUID and save for later
1831 struct kern_nexus *multistack_nx = nx_find(nx->ms_instance, false);
1832 if (multistack_nx != NULL) {
1833 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(multistack_nx);
1834 if (flowswitch != NULL) {
1835 FSW_RLOCK(flowswitch);
1836 struct fsw_ms_context *ms_context = (struct fsw_ms_context *)flowswitch->fsw_ops_private;
1837 if (ms_context != NULL) {
1838 uuid_copy(nx->ms_agent, ms_context->mc_agent_uuid);
1839 } else {
1840 printf("ipsec_multistack_attach - fsw_ms_context is NULL\n");
1841 }
1842 FSW_UNLOCK(flowswitch);
1843 } else {
1844 printf("ipsec_multistack_attach - flowswitch is NULL\n");
1845 }
1846 nx_release(multistack_nx);
1847 } else {
1848 printf("ipsec_multistack_attach - unable to find multistack nexus\n");
1849 }
1850
1851 return (0);
1852
1853 failed:
1854 ipsec_nexus_detach(nx);
1855
1856 errno_t detach_error = 0;
1857 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
1858 panic("ipsec_multistack_attach - ifnet_detach failed: %d\n", detach_error);
1859 /* NOT REACHED */
1860 }
1861
1862 return (err);
1863 }
1864
1865 #pragma mark Kernel Pipe Nexus
1866
1867 static errno_t
1868 ipsec_register_kernel_pipe_nexus(void)
1869 {
1870 nexus_attr_t nxa = NULL;
1871 errno_t result;
1872
1873 lck_mtx_lock(&ipsec_lock);
1874 if (ipsec_ncd_refcount++) {
1875 lck_mtx_unlock(&ipsec_lock);
1876 return 0;
1877 }
1878
1879 result = kern_nexus_controller_create(&ipsec_ncd);
1880 if (result) {
1881 printf("%s: kern_nexus_controller_create failed: %d\n",
1882 __FUNCTION__, result);
1883 goto done;
1884 }
1885
1886 uuid_t dom_prov;
1887 result = kern_nexus_get_builtin_domain_provider(
1888 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1889 if (result) {
1890 printf("%s: kern_nexus_get_builtin_domain_provider failed: %d\n",
1891 __FUNCTION__, result);
1892 goto done;
1893 }
1894
1895 struct kern_nexus_provider_init prov_init = {
1896 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1897 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1898 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1899 .nxpi_connected = ipsec_nexus_connected,
1900 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
1901 .nxpi_disconnected = ipsec_nexus_disconnected,
1902 .nxpi_ring_init = ipsec_kpipe_ring_init,
1903 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
1904 .nxpi_slot_init = NULL,
1905 .nxpi_slot_fini = NULL,
1906 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
1907 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
1908 .nxpi_tx_doorbell = NULL,
1909 };
1910
1911 result = kern_nexus_attr_create(&nxa);
1912 if (result) {
1913 printf("%s: kern_nexus_attr_create failed: %d\n",
1914 __FUNCTION__, result);
1915 goto done;
1916 }
1917
1918 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
1919 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1920 VERIFY(result == 0);
1921
1922 // Reset ring size for kernel pipe nexus to limit memory usage
1923 uint64_t ring_size = if_ipsec_ring_size;
1924 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1925 VERIFY(result == 0);
1926 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1927 VERIFY(result == 0);
1928
1929 result = kern_nexus_controller_register_provider(ipsec_ncd,
1930 dom_prov,
1931 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
1932 &prov_init,
1933 sizeof(prov_init),
1934 nxa,
1935 &ipsec_kpipe_uuid);
1936 if (result) {
1937 printf("%s: kern_nexus_controller_register_provider failed: %d\n",
1938 __FUNCTION__, result);
1939 goto done;
1940 }
1941
1942 done:
1943 if (nxa) {
1944 kern_nexus_attr_destroy(nxa);
1945 }
1946
1947 if (result) {
1948 if (ipsec_ncd) {
1949 kern_nexus_controller_destroy(ipsec_ncd);
1950 ipsec_ncd = NULL;
1951 }
1952 ipsec_ncd_refcount = 0;
1953 }
1954
1955 lck_mtx_unlock(&ipsec_lock);
1956
1957 return result;
1958 }
1959
1960 static void
1961 ipsec_unregister_kernel_pipe_nexus(void)
1962 {
1963 lck_mtx_lock(&ipsec_lock);
1964
1965 VERIFY(ipsec_ncd_refcount > 0);
1966
1967 if (--ipsec_ncd_refcount == 0) {
1968 kern_nexus_controller_destroy(ipsec_ncd);
1969 ipsec_ncd = NULL;
1970 }
1971
1972 lck_mtx_unlock(&ipsec_lock);
1973 }
1974
1975 // For use by socket option, not internally
1976 static errno_t
1977 ipsec_disable_channel(struct ipsec_pcb *pcb)
1978 {
1979 errno_t result;
1980 int enabled;
1981 uuid_t uuid;
1982
1983 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
1984
1985 enabled = pcb->ipsec_kpipe_enabled;
1986 uuid_copy(uuid, pcb->ipsec_kpipe_uuid);
1987
1988 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid) == !enabled);
1989
1990 pcb->ipsec_kpipe_enabled = 0;
1991 uuid_clear(pcb->ipsec_kpipe_uuid);
1992
1993 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
1994
1995 if (enabled) {
1996 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, uuid);
1997 } else {
1998 result = ENXIO;
1999 }
2000
2001 if (!result) {
2002 ipsec_unregister_kernel_pipe_nexus();
2003 }
2004
2005 return result;
2006 }
2007
2008 static errno_t
2009 ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2010 {
2011 struct kern_nexus_init init;
2012 errno_t result;
2013
2014 result = ipsec_register_kernel_pipe_nexus();
2015 if (result) {
2016 return result;
2017 }
2018
2019 VERIFY(ipsec_ncd);
2020
2021 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2022
2023 if (pcb->ipsec_kpipe_enabled) {
2024 result = EEXIST; // return success instead?
2025 goto done;
2026 }
2027
2028 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid));
2029 bzero(&init, sizeof (init));
2030 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2031 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2032 ipsec_kpipe_uuid, pcb, &pcb->ipsec_kpipe_uuid, &init);
2033 if (result) {
2034 goto done;
2035 }
2036
2037 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2038 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2039 pcb->ipsec_kpipe_uuid, &port,
2040 proc_pid(proc), NULL, NULL, 0, NEXUS_BIND_PID);
2041 if (result) {
2042 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2043 pcb->ipsec_kpipe_uuid);
2044 uuid_clear(pcb->ipsec_kpipe_uuid);
2045 goto done;
2046 }
2047
2048 pcb->ipsec_kpipe_enabled = 1;
2049
2050 done:
2051 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2052
2053 if (result) {
2054 ipsec_unregister_kernel_pipe_nexus();
2055 }
2056
2057 return result;
2058 }
2059
2060 #endif // IPSEC_NEXUS
2061
2062
2063 /* Kernel control functions */
2064
2065 static inline void
2066 ipsec_free_pcb(struct ipsec_pcb *pcb)
2067 {
2068 #if IPSEC_NEXUS
2069 mbuf_freem_list(pcb->ipsec_input_chain);
2070 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, ipsec_lck_grp);
2071 #endif // IPSEC_NEXUS
2072 lck_rw_destroy(&pcb->ipsec_pcb_lock, ipsec_lck_grp);
2073 lck_mtx_lock(&ipsec_lock);
2074 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2075 lck_mtx_unlock(&ipsec_lock);
2076 zfree(ipsec_pcb_zone, pcb);
2077 }
2078
2079 static errno_t
2080 ipsec_ctl_connect(kern_ctl_ref kctlref,
2081 struct sockaddr_ctl *sac,
2082 void **unitinfo)
2083 {
2084 struct ifnet_init_eparams ipsec_init = {};
2085 errno_t result = 0;
2086
2087 struct ipsec_pcb *pcb = zalloc(ipsec_pcb_zone);
2088 memset(pcb, 0, sizeof(*pcb));
2089
2090 /* Setup the protocol control block */
2091 *unitinfo = pcb;
2092 pcb->ipsec_ctlref = kctlref;
2093 pcb->ipsec_unit = sac->sc_unit;
2094 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2095
2096 lck_mtx_lock(&ipsec_lock);
2097
2098 /* Find some open interface id */
2099 u_int32_t chosen_unique_id = 1;
2100 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2101 if (next_pcb != NULL) {
2102 /* List was not empty, add one to the last item */
2103 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2104 next_pcb = NULL;
2105
2106 /*
2107 * If this wrapped the id number, start looking at
2108 * the front of the list for an unused id.
2109 */
2110 if (chosen_unique_id == 0) {
2111 /* Find the next unused ID */
2112 chosen_unique_id = 1;
2113 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2114 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2115 /* We found a gap */
2116 break;
2117 }
2118
2119 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2120 }
2121 }
2122 }
2123
2124 pcb->ipsec_unique_id = chosen_unique_id;
2125
2126 if (next_pcb != NULL) {
2127 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2128 } else {
2129 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2130 }
2131 lck_mtx_unlock(&ipsec_lock);
2132
2133 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2134 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2135 printf("ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2136
2137 lck_rw_init(&pcb->ipsec_pcb_lock, ipsec_lck_grp, ipsec_lck_attr);
2138 #if IPSEC_NEXUS
2139 lck_mtx_init(&pcb->ipsec_input_chain_lock, ipsec_lck_grp, ipsec_lck_attr);
2140 #endif // IPSEC_NEXUS
2141
2142 /* Create the interface */
2143 bzero(&ipsec_init, sizeof(ipsec_init));
2144 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2145 ipsec_init.len = sizeof (ipsec_init);
2146
2147 #if IPSEC_NEXUS
2148 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2149 #else // IPSEC_NEXUS
2150 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2151 ipsec_init.start = ipsec_start;
2152 #endif // IPSEC_NEXUS
2153 ipsec_init.name = "ipsec";
2154 ipsec_init.unit = pcb->ipsec_unit - 1;
2155 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2156 ipsec_init.uniqueid_len = strlen(pcb->ipsec_unique_name);
2157 ipsec_init.family = ipsec_family;
2158 ipsec_init.subfamily = IFNET_SUBFAMILY_IPSEC;
2159 ipsec_init.type = IFT_OTHER;
2160 ipsec_init.demux = ipsec_demux;
2161 ipsec_init.add_proto = ipsec_add_proto;
2162 ipsec_init.del_proto = ipsec_del_proto;
2163 ipsec_init.softc = pcb;
2164 ipsec_init.ioctl = ipsec_ioctl;
2165 ipsec_init.detach = ipsec_detached;
2166
2167 #if IPSEC_NEXUS
2168 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2169 if (result != 0) {
2170 printf("ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2171 ipsec_free_pcb(pcb);
2172 *unitinfo = NULL;
2173 return result;
2174 }
2175
2176 result = ipsec_multistack_attach(pcb);
2177 if (result != 0) {
2178 printf("ipsec_ctl_connect - ipsec_multistack_attach failed: %d\n", result);
2179 *unitinfo = NULL;
2180 return result;
2181 }
2182
2183 #else // IPSEC_NEXUS
2184 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2185 if (result != 0) {
2186 printf("ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2187 ipsec_free_pcb(pcb);
2188 *unitinfo = NULL;
2189 return result;
2190 }
2191 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2192
2193 /* Attach the interface */
2194 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2195 if (result != 0) {
2196 printf("ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2197 ifnet_release(pcb->ipsec_ifp);
2198 ipsec_free_pcb(pcb);
2199 *unitinfo = NULL;
2200 return (result);
2201 }
2202 #endif // IPSEC_NEXUS
2203
2204 /* Attach to bpf */
2205 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2206
2207 /* The interfaces resoures allocated, mark it as running */
2208 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2209
2210 return (0);
2211 }
2212
2213 static errno_t
2214 ipsec_detach_ip(ifnet_t interface,
2215 protocol_family_t protocol,
2216 socket_t pf_socket)
2217 {
2218 errno_t result = EPROTONOSUPPORT;
2219
2220 /* Attempt a detach */
2221 if (protocol == PF_INET) {
2222 struct ifreq ifr;
2223
2224 bzero(&ifr, sizeof(ifr));
2225 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2226 ifnet_name(interface), ifnet_unit(interface));
2227
2228 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2229 }
2230 else if (protocol == PF_INET6) {
2231 struct in6_ifreq ifr6;
2232
2233 bzero(&ifr6, sizeof(ifr6));
2234 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2235 ifnet_name(interface), ifnet_unit(interface));
2236
2237 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2238 }
2239
2240 return result;
2241 }
2242
2243 static void
2244 ipsec_remove_address(ifnet_t interface,
2245 protocol_family_t protocol,
2246 ifaddr_t address,
2247 socket_t pf_socket)
2248 {
2249 errno_t result = 0;
2250
2251 /* Attempt a detach */
2252 if (protocol == PF_INET) {
2253 struct ifreq ifr;
2254
2255 bzero(&ifr, sizeof(ifr));
2256 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2257 ifnet_name(interface), ifnet_unit(interface));
2258 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2259 if (result != 0) {
2260 printf("ipsec_remove_address - ifaddr_address failed: %d", result);
2261 }
2262 else {
2263 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2264 if (result != 0) {
2265 printf("ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2266 }
2267 }
2268 }
2269 else if (protocol == PF_INET6) {
2270 struct in6_ifreq ifr6;
2271
2272 bzero(&ifr6, sizeof(ifr6));
2273 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2274 ifnet_name(interface), ifnet_unit(interface));
2275 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2276 sizeof(ifr6.ifr_addr));
2277 if (result != 0) {
2278 printf("ipsec_remove_address - ifaddr_address failed (v6): %d",
2279 result);
2280 }
2281 else {
2282 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2283 if (result != 0) {
2284 printf("ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2285 result);
2286 }
2287 }
2288 }
2289 }
2290
2291 static void
2292 ipsec_cleanup_family(ifnet_t interface,
2293 protocol_family_t protocol)
2294 {
2295 errno_t result = 0;
2296 socket_t pf_socket = NULL;
2297 ifaddr_t *addresses = NULL;
2298 int i;
2299
2300 if (protocol != PF_INET && protocol != PF_INET6) {
2301 printf("ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2302 return;
2303 }
2304
2305 /* Create a socket for removing addresses and detaching the protocol */
2306 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2307 if (result != 0) {
2308 if (result != EAFNOSUPPORT)
2309 printf("ipsec_cleanup_family - failed to create %s socket: %d\n",
2310 protocol == PF_INET ? "IP" : "IPv6", result);
2311 goto cleanup;
2312 }
2313
2314 /* always set SS_PRIV, we want to close and detach regardless */
2315 sock_setpriv(pf_socket, 1);
2316
2317 result = ipsec_detach_ip(interface, protocol, pf_socket);
2318 if (result == 0 || result == ENXIO) {
2319 /* We are done! We either detached or weren't attached. */
2320 goto cleanup;
2321 }
2322 else if (result != EBUSY) {
2323 /* Uh, not really sure what happened here... */
2324 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2325 goto cleanup;
2326 }
2327
2328 /*
2329 * At this point, we received an EBUSY error. This means there are
2330 * addresses attached. We should detach them and then try again.
2331 */
2332 result = ifnet_get_address_list_family(interface, &addresses, protocol);
2333 if (result != 0) {
2334 printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2335 ifnet_name(interface), ifnet_unit(interface),
2336 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2337 goto cleanup;
2338 }
2339
2340 for (i = 0; addresses[i] != 0; i++) {
2341 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2342 }
2343 ifnet_free_address_list(addresses);
2344 addresses = NULL;
2345
2346 /*
2347 * The addresses should be gone, we should try the remove again.
2348 */
2349 result = ipsec_detach_ip(interface, protocol, pf_socket);
2350 if (result != 0 && result != ENXIO) {
2351 printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2352 }
2353
2354 cleanup:
2355 if (pf_socket != NULL)
2356 sock_close(pf_socket);
2357
2358 if (addresses != NULL)
2359 ifnet_free_address_list(addresses);
2360 }
2361
2362 static errno_t
2363 ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2364 __unused u_int32_t unit,
2365 void *unitinfo)
2366 {
2367 struct ipsec_pcb *pcb = unitinfo;
2368 ifnet_t ifp = NULL;
2369 errno_t result = 0;
2370
2371 if (pcb == NULL) {
2372 return EINVAL;
2373 }
2374
2375 #if IPSEC_NEXUS
2376 // Tell the nexus to stop all rings
2377 if (pcb->ipsec_netif_nexus != NULL) {
2378 kern_nexus_stop(pcb->ipsec_netif_nexus);
2379 }
2380 #endif // IPSEC_NEXUS
2381
2382 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2383
2384 #if IPSEC_NEXUS
2385 uuid_t kpipe_uuid;
2386 uuid_copy(kpipe_uuid, pcb->ipsec_kpipe_uuid);
2387 uuid_clear(pcb->ipsec_kpipe_uuid);
2388 pcb->ipsec_kpipe_enabled = FALSE;
2389 #endif // IPSEC_NEXUS
2390
2391 ifp = pcb->ipsec_ifp;
2392 VERIFY(ifp != NULL);
2393 pcb->ipsec_ctlref = NULL;
2394
2395 /*
2396 * Quiesce the interface and flush any pending outbound packets.
2397 */
2398 if_down(ifp);
2399
2400 /* Increment refcnt, but detach interface */
2401 ifnet_incr_iorefcnt(ifp);
2402 if ((result = ifnet_detach(ifp)) != 0) {
2403 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2404 /* NOT REACHED */
2405 }
2406
2407 /*
2408 * We want to do everything in our power to ensure that the interface
2409 * really goes away when the socket is closed. We must remove IP/IPv6
2410 * addresses and detach the protocols. Finally, we can remove and
2411 * release the interface.
2412 */
2413 key_delsp_for_ipsec_if(ifp);
2414
2415 ipsec_cleanup_family(ifp, AF_INET);
2416 ipsec_cleanup_family(ifp, AF_INET6);
2417
2418 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2419
2420 #if IPSEC_NEXUS
2421 if (!uuid_is_null(kpipe_uuid)) {
2422 if (kern_nexus_controller_free_provider_instance(ipsec_ncd, kpipe_uuid) == 0) {
2423 ipsec_unregister_kernel_pipe_nexus();
2424 }
2425 }
2426 ipsec_nexus_detach(&pcb->ipsec_nx);
2427 #endif // IPSEC_NEXUS
2428
2429 /* Decrement refcnt to finish detaching and freeing */
2430 ifnet_decr_iorefcnt(ifp);
2431
2432 return 0;
2433 }
2434
2435 static errno_t
2436 ipsec_ctl_send(__unused kern_ctl_ref kctlref,
2437 __unused u_int32_t unit,
2438 __unused void *unitinfo,
2439 mbuf_t m,
2440 __unused int flags)
2441 {
2442 /* Receive messages from the control socket. Currently unused. */
2443 mbuf_freem(m);
2444 return 0;
2445 }
2446
2447 static errno_t
2448 ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
2449 __unused u_int32_t unit,
2450 void *unitinfo,
2451 int opt,
2452 void *data,
2453 size_t len)
2454 {
2455 struct ipsec_pcb *pcb = unitinfo;
2456 errno_t result = 0;
2457
2458 /* check for privileges for privileged options */
2459 switch (opt) {
2460 case IPSEC_OPT_FLAGS:
2461 case IPSEC_OPT_EXT_IFDATA_STATS:
2462 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
2463 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
2464 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2465 return EPERM;
2466 }
2467 break;
2468 }
2469
2470 switch (opt) {
2471 case IPSEC_OPT_FLAGS:
2472 if (len != sizeof(u_int32_t))
2473 result = EMSGSIZE;
2474 else
2475 pcb->ipsec_flags = *(u_int32_t *)data;
2476 break;
2477
2478 case IPSEC_OPT_EXT_IFDATA_STATS:
2479 if (len != sizeof(int)) {
2480 result = EMSGSIZE;
2481 break;
2482 }
2483 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2484 break;
2485
2486 case IPSEC_OPT_INC_IFDATA_STATS_IN:
2487 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
2488 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
2489
2490 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
2491 result = EINVAL;
2492 break;
2493 }
2494 if (!pcb->ipsec_ext_ifdata_stats) {
2495 result = EINVAL;
2496 break;
2497 }
2498 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN)
2499 ifnet_stat_increment_in(pcb->ipsec_ifp, utsp->utsp_packets,
2500 utsp->utsp_bytes, utsp->utsp_errors);
2501 else
2502 ifnet_stat_increment_out(pcb->ipsec_ifp, utsp->utsp_packets,
2503 utsp->utsp_bytes, utsp->utsp_errors);
2504 break;
2505 }
2506
2507 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
2508 ifnet_t del_ifp = NULL;
2509 char name[IFNAMSIZ];
2510
2511 if (len > IFNAMSIZ - 1) {
2512 result = EMSGSIZE;
2513 break;
2514 }
2515 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2516 bcopy(data, name, len);
2517 name[len] = 0;
2518 result = ifnet_find_by_name(name, &del_ifp);
2519 }
2520 if (result == 0) {
2521 printf("%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
2522 __func__, pcb->ipsec_ifp->if_xname,
2523 del_ifp->if_xname);
2524
2525 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
2526 if (del_ifp)
2527 ifnet_release(del_ifp);
2528 }
2529 break;
2530 }
2531
2532 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
2533 if (len != sizeof(int)) {
2534 result = EMSGSIZE;
2535 break;
2536 }
2537 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
2538 if (output_service_class == MBUF_SC_UNSPEC) {
2539 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2540 } else {
2541 pcb->ipsec_output_service_class = output_service_class;
2542 }
2543 printf("%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
2544 __func__, pcb->ipsec_ifp->if_xname,
2545 pcb->ipsec_output_service_class);
2546 break;
2547 }
2548
2549 #if IPSEC_NEXUS
2550 case IPSEC_OPT_ENABLE_CHANNEL: {
2551 if (len != sizeof(int)) {
2552 result = EMSGSIZE;
2553 break;
2554 }
2555 if (*(int *)data) {
2556 result = ipsec_enable_channel(pcb, current_proc());
2557 } else {
2558 result = ipsec_disable_channel(pcb);
2559 }
2560 break;
2561 }
2562
2563 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
2564 if (len != sizeof(int)) {
2565 result = EMSGSIZE;
2566 break;
2567 }
2568 if (!if_enable_netagent) {
2569 result = ENOTSUP;
2570 break;
2571 }
2572 if (*(int *)data) {
2573 if (!uuid_is_null(pcb->ipsec_nx.ms_agent)) {
2574 if_add_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2575 }
2576 } else {
2577 if (!uuid_is_null(pcb->ipsec_nx.ms_agent)) {
2578 if_delete_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent);
2579 }
2580 }
2581 break;
2582 }
2583
2584 case IPSEC_OPT_INPUT_FRAG_SIZE: {
2585 if (len != sizeof(u_int32_t)) {
2586 result = EMSGSIZE;
2587 break;
2588 }
2589 u_int32_t input_frag_size = *(u_int32_t *)data;
2590 if (input_frag_size <= sizeof(struct ip6_hdr)) {
2591 pcb->ipsec_frag_size_set = FALSE;
2592 pcb->ipsec_input_frag_size = 0;
2593 } else {
2594 printf("SET FRAG SIZE TO %u\n", input_frag_size);
2595 pcb->ipsec_frag_size_set = TRUE;
2596 pcb->ipsec_input_frag_size = input_frag_size;
2597 }
2598 break;
2599 }
2600 #endif // IPSEC_NEXUS
2601
2602 default:
2603 result = ENOPROTOOPT;
2604 break;
2605 }
2606
2607 return result;
2608 }
2609
2610 static errno_t
2611 ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
2612 __unused u_int32_t unit,
2613 void *unitinfo,
2614 int opt,
2615 void *data,
2616 size_t *len)
2617 {
2618 struct ipsec_pcb *pcb = unitinfo;
2619 errno_t result = 0;
2620
2621 switch (opt) {
2622 case IPSEC_OPT_FLAGS: {
2623 if (*len != sizeof(u_int32_t)) {
2624 result = EMSGSIZE;
2625 } else {
2626 *(u_int32_t *)data = pcb->ipsec_flags;
2627 }
2628 break;
2629 }
2630
2631 case IPSEC_OPT_EXT_IFDATA_STATS: {
2632 if (*len != sizeof(int)) {
2633 result = EMSGSIZE;
2634 } else {
2635 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
2636 }
2637 break;
2638 }
2639
2640 case IPSEC_OPT_IFNAME: {
2641 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
2642 result = EMSGSIZE;
2643 } else {
2644 *len = snprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
2645 }
2646 break;
2647 }
2648
2649 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
2650 if (*len != sizeof(int)) {
2651 result = EMSGSIZE;
2652 } else {
2653 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
2654 }
2655 break;
2656 }
2657
2658 #if IPSEC_NEXUS
2659 case IPSEC_OPT_GET_CHANNEL_UUID: {
2660 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
2661 if (uuid_is_null(pcb->ipsec_kpipe_uuid)) {
2662 result = ENXIO;
2663 } else if (*len != sizeof(uuid_t)) {
2664 result = EMSGSIZE;
2665 } else {
2666 uuid_copy(data, pcb->ipsec_kpipe_uuid);
2667 }
2668 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
2669 break;
2670 }
2671
2672 case IPSEC_OPT_INPUT_FRAG_SIZE: {
2673 if (*len != sizeof(u_int32_t)) {
2674 result = EMSGSIZE;
2675 } else {
2676 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
2677 }
2678 break;
2679 }
2680 #endif // IPSEC_NEXUS
2681
2682 default: {
2683 result = ENOPROTOOPT;
2684 break;
2685 }
2686 }
2687
2688 return result;
2689 }
2690
2691 /* Network Interface functions */
2692 static errno_t
2693 ipsec_output(ifnet_t interface,
2694 mbuf_t data)
2695 {
2696 struct ipsec_pcb *pcb = ifnet_softc(interface);
2697 struct ipsec_output_state ipsec_state;
2698 struct route ro;
2699 struct route_in6 ro6;
2700 int length;
2701 struct ip *ip;
2702 struct ip6_hdr *ip6;
2703 struct ip_out_args ipoa;
2704 struct ip6_out_args ip6oa;
2705 int error = 0;
2706 u_int ip_version = 0;
2707 int flags = 0;
2708 struct flowadv *adv = NULL;
2709
2710 // Make sure this packet isn't looping through the interface
2711 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
2712 error = EINVAL;
2713 goto ipsec_output_err;
2714 }
2715
2716 // Mark the interface so NECP can evaluate tunnel policy
2717 necp_mark_packet_from_interface(data, interface);
2718
2719 ip = mtod(data, struct ip *);
2720 ip_version = ip->ip_v;
2721
2722 switch (ip_version) {
2723 case 4: {
2724 /* Apply encryption */
2725 memset(&ipsec_state, 0, sizeof(ipsec_state));
2726 ipsec_state.m = data;
2727 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
2728 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
2729
2730 error = ipsec4_interface_output(&ipsec_state, interface);
2731 /* Tunneled in IPv6 - packet is gone */
2732 if (error == 0 && ipsec_state.tunneled == 6) {
2733 goto done;
2734 }
2735
2736 data = ipsec_state.m;
2737 if (error || data == NULL) {
2738 if (error) {
2739 printf("ipsec_output: ipsec4_output error %d.\n", error);
2740 }
2741 goto ipsec_output_err;
2742 }
2743
2744 /* Set traffic class, set flow */
2745 m_set_service_class(data, pcb->ipsec_output_service_class);
2746 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
2747 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
2748 data->m_pkthdr.pkt_proto = ip->ip_p;
2749 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
2750
2751 /* Flip endian-ness for ip_output */
2752 ip = mtod(data, struct ip *);
2753 NTOHS(ip->ip_len);
2754 NTOHS(ip->ip_off);
2755
2756 /* Increment statistics */
2757 length = mbuf_pkthdr_len(data);
2758 ifnet_stat_increment_out(interface, 1, length, 0);
2759
2760 /* Send to ip_output */
2761 memset(&ro, 0, sizeof(ro));
2762
2763 flags = (IP_OUTARGS | /* Passing out args to specify interface */
2764 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
2765
2766 memset(&ipoa, 0, sizeof(ipoa));
2767 ipoa.ipoa_flowadv.code = 0;
2768 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
2769 if (ipsec_state.outgoing_if) {
2770 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
2771 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
2772 }
2773 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
2774
2775 adv = &ipoa.ipoa_flowadv;
2776
2777 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
2778 data = NULL;
2779
2780 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
2781 error = ENOBUFS;
2782 ifnet_disable_output(interface);
2783 }
2784
2785 goto done;
2786 }
2787 case 6: {
2788 data = ipsec6_splithdr(data);
2789 if (data == NULL) {
2790 printf("ipsec_output: ipsec6_splithdr returned NULL\n");
2791 goto ipsec_output_err;
2792 }
2793
2794 ip6 = mtod(data, struct ip6_hdr *);
2795
2796 memset(&ipsec_state, 0, sizeof(ipsec_state));
2797 ipsec_state.m = data;
2798 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
2799 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
2800
2801 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
2802 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
2803 goto done;
2804 }
2805 data = ipsec_state.m;
2806 if (error || data == NULL) {
2807 if (error) {
2808 printf("ipsec_output: ipsec6_output error %d\n", error);
2809 }
2810 goto ipsec_output_err;
2811 }
2812
2813 /* Set traffic class, set flow */
2814 m_set_service_class(data, pcb->ipsec_output_service_class);
2815 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
2816 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
2817 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
2818 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
2819
2820 /* Increment statistics */
2821 length = mbuf_pkthdr_len(data);
2822 ifnet_stat_increment_out(interface, 1, length, 0);
2823
2824 /* Send to ip6_output */
2825 memset(&ro6, 0, sizeof(ro6));
2826
2827 flags = IPV6_OUTARGS;
2828
2829 memset(&ip6oa, 0, sizeof(ip6oa));
2830 ip6oa.ip6oa_flowadv.code = 0;
2831 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
2832 if (ipsec_state.outgoing_if) {
2833 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
2834 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
2835 }
2836 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
2837
2838 adv = &ip6oa.ip6oa_flowadv;
2839
2840 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
2841 data = NULL;
2842
2843 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
2844 error = ENOBUFS;
2845 ifnet_disable_output(interface);
2846 }
2847
2848 goto done;
2849 }
2850 default: {
2851 printf("ipsec_output: Received unknown packet version %d.\n", ip_version);
2852 error = EINVAL;
2853 goto ipsec_output_err;
2854 }
2855 }
2856
2857 done:
2858 return error;
2859
2860 ipsec_output_err:
2861 if (data)
2862 mbuf_freem(data);
2863 goto done;
2864 }
2865
2866 #if !IPSEC_NEXUS
2867 static void
2868 ipsec_start(ifnet_t interface)
2869 {
2870 mbuf_t data;
2871 struct ipsec_pcb *pcb = ifnet_softc(interface);
2872
2873 VERIFY(pcb != NULL);
2874 for (;;) {
2875 if (ifnet_dequeue(interface, &data) != 0)
2876 break;
2877 if (ipsec_output(interface, data) != 0)
2878 break;
2879 }
2880 }
2881 #endif // !IPSEC_NEXUS
2882
2883 /* Network Interface functions */
2884 static errno_t
2885 ipsec_demux(__unused ifnet_t interface,
2886 mbuf_t data,
2887 __unused char *frame_header,
2888 protocol_family_t *protocol)
2889 {
2890 struct ip *ip;
2891 u_int ip_version;
2892
2893 while (data != NULL && mbuf_len(data) < 1) {
2894 data = mbuf_next(data);
2895 }
2896
2897 if (data == NULL)
2898 return ENOENT;
2899
2900 ip = mtod(data, struct ip *);
2901 ip_version = ip->ip_v;
2902
2903 switch(ip_version) {
2904 case 4:
2905 *protocol = PF_INET;
2906 return 0;
2907 case 6:
2908 *protocol = PF_INET6;
2909 return 0;
2910 default:
2911 break;
2912 }
2913
2914 return 0;
2915 }
2916
2917 static errno_t
2918 ipsec_add_proto(__unused ifnet_t interface,
2919 protocol_family_t protocol,
2920 __unused const struct ifnet_demux_desc *demux_array,
2921 __unused u_int32_t demux_count)
2922 {
2923 switch(protocol) {
2924 case PF_INET:
2925 return 0;
2926 case PF_INET6:
2927 return 0;
2928 default:
2929 break;
2930 }
2931
2932 return ENOPROTOOPT;
2933 }
2934
2935 static errno_t
2936 ipsec_del_proto(__unused ifnet_t interface,
2937 __unused protocol_family_t protocol)
2938 {
2939 return 0;
2940 }
2941
2942 static errno_t
2943 ipsec_ioctl(ifnet_t interface,
2944 u_long command,
2945 void *data)
2946 {
2947 errno_t result = 0;
2948
2949 switch(command) {
2950 case SIOCSIFMTU:
2951 #if IPSEC_NEXUS
2952 // Make sure we can fit packets in the channel buffers
2953 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > IPSEC_IF_DEFAULT_SLOT_SIZE) {
2954 ifnet_set_mtu(interface, IPSEC_IF_DEFAULT_SLOT_SIZE);
2955 break;
2956 }
2957 #endif // IPSEC_NEXUS
2958 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
2959 break;
2960
2961 case SIOCSIFFLAGS:
2962 /* ifioctl() takes care of it */
2963 break;
2964
2965 default:
2966 result = EOPNOTSUPP;
2967 }
2968
2969 return result;
2970 }
2971
2972 static void
2973 ipsec_detached(ifnet_t interface)
2974 {
2975 struct ipsec_pcb *pcb = ifnet_softc(interface);
2976 (void)ifnet_release(interface);
2977 ipsec_free_pcb(pcb);
2978 }
2979
2980 /* Protocol Handlers */
2981
2982 static errno_t
2983 ipsec_proto_input(ifnet_t interface,
2984 protocol_family_t protocol,
2985 mbuf_t m,
2986 __unused char *frame_header)
2987 {
2988 mbuf_pkthdr_setrcvif(m, interface);
2989 pktap_input(interface, protocol, m, NULL);
2990
2991 if (proto_input(protocol, m) != 0) {
2992 ifnet_stat_increment_in(interface, 0, 0, 1);
2993 m_freem(m);
2994 } else {
2995 ifnet_stat_increment_in(interface, 1, m->m_pkthdr.len, 0);
2996 }
2997
2998 return 0;
2999 }
3000
3001 static errno_t
3002 ipsec_proto_pre_output(__unused ifnet_t interface,
3003 protocol_family_t protocol,
3004 __unused mbuf_t *packet,
3005 __unused const struct sockaddr *dest,
3006 __unused void *route,
3007 __unused char *frame_type,
3008 __unused char *link_layer_dest)
3009 {
3010
3011 *(protocol_family_t *)(void *)frame_type = protocol;
3012 return 0;
3013 }
3014
3015 static errno_t
3016 ipsec_attach_proto(ifnet_t interface,
3017 protocol_family_t protocol)
3018 {
3019 struct ifnet_attach_proto_param proto;
3020 errno_t result;
3021
3022 bzero(&proto, sizeof(proto));
3023 proto.input = ipsec_proto_input;
3024 proto.pre_output = ipsec_proto_pre_output;
3025
3026 result = ifnet_attach_protocol(interface, protocol, &proto);
3027 if (result != 0 && result != EEXIST) {
3028 printf("ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
3029 protocol, result);
3030 }
3031
3032 return result;
3033 }
3034
3035 #if IPSEC_NEXUS
3036 errno_t
3037 ipsec_inject_inbound_packet(ifnet_t interface,
3038 mbuf_t packet)
3039 {
3040 struct ipsec_pcb *pcb = ifnet_softc(interface);
3041
3042 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3043
3044 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
3045 if (pcb->ipsec_input_chain != NULL) {
3046 pcb->ipsec_input_chain_last->m_nextpkt = packet;
3047 } else {
3048 pcb->ipsec_input_chain = packet;
3049 }
3050 while (packet->m_nextpkt) {
3051 VERIFY(packet != packet->m_nextpkt);
3052 packet = packet->m_nextpkt;
3053 }
3054 pcb->ipsec_input_chain_last = packet;
3055 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
3056
3057 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring;
3058 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3059
3060 if (rx_ring != NULL) {
3061 kern_channel_notify(rx_ring, 0);
3062 }
3063
3064 return (0);
3065 }
3066 #else // IPSEC_NEXUS
3067 errno_t
3068 ipsec_inject_inbound_packet(ifnet_t interface,
3069 mbuf_t packet)
3070 {
3071 errno_t error;
3072 protocol_family_t protocol;
3073 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
3074 return error;
3075 }
3076
3077 return ipsec_proto_input(interface, protocol, packet, NULL);
3078 }
3079 #endif // IPSEC_NEXUS
3080
3081 void
3082 ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family)
3083 {
3084 if (packet != NULL && interface != NULL) {
3085 struct ipsec_pcb *pcb = ifnet_softc(interface);
3086 if (pcb != NULL) {
3087 /* Set traffic class, set flow */
3088 m_set_service_class(packet, pcb->ipsec_output_service_class);
3089 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3090 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
3091 if (family == AF_INET) {
3092 struct ip *ip = mtod(packet, struct ip *);
3093 packet->m_pkthdr.pkt_proto = ip->ip_p;
3094 } else if (family == AF_INET6) {
3095 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
3096 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3097 }
3098 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3099 }
3100 }
3101 }
3102
3103 void
3104 ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
3105 {
3106 struct ipsec_pcb *pcb;
3107
3108 if (interface == NULL || ipoa == NULL)
3109 return;
3110 pcb = ifnet_softc(interface);
3111
3112 if (net_qos_policy_restricted == 0) {
3113 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
3114 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
3115 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
3116 net_qos_policy_restrict_avapps != 0) {
3117 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
3118 } else {
3119 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
3120 ipoa->ipoa_sotc = SO_TC_VO;
3121 }
3122 }
3123
3124 void
3125 ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
3126 {
3127 struct ipsec_pcb *pcb;
3128
3129 if (interface == NULL || ip6oa == NULL)
3130 return;
3131 pcb = ifnet_softc(interface);
3132
3133 if (net_qos_policy_restricted == 0) {
3134 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
3135 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
3136 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
3137 net_qos_policy_restrict_avapps != 0) {
3138 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
3139 } else {
3140 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
3141 ip6oa->ip6oa_sotc = SO_TC_VO;
3142 }
3143 }