]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/net/if_ipsec.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / net / if_ipsec.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30#include <sys/systm.h>
31#include <sys/kern_control.h>
32#include <net/kpi_protocol.h>
33#include <net/kpi_interface.h>
34#include <sys/socket.h>
35#include <sys/socketvar.h>
36#include <net/if.h>
37#include <net/if_types.h>
38#include <net/bpf.h>
39#include <net/if_ipsec.h>
40#include <sys/mbuf.h>
41#include <sys/sockio.h>
42#include <netinet/in.h>
43#include <netinet/ip6.h>
44#include <netinet6/in6_var.h>
45#include <netinet6/ip6_var.h>
46#include <sys/kauth.h>
47#include <netinet6/ipsec.h>
48#include <netinet6/ipsec6.h>
49#include <netinet6/esp.h>
50#include <netinet6/esp6.h>
51#include <netinet/ip.h>
52#include <net/flowadv.h>
53#include <net/necp.h>
54#include <netkey/key.h>
55#include <net/pktap.h>
56#include <kern/zalloc.h>
57#include <os/log.h>
58
59#define IPSEC_NEXUS 0
60
61extern int net_qos_policy_restricted;
62extern int net_qos_policy_restrict_avapps;
63
64/* Kernel Control functions */
65static errno_t ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
66static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
67 void **unitinfo);
68static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
69 void **unitinfo);
70static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
71 void *unitinfo);
72static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
73 void *unitinfo, mbuf_t m, int flags);
74static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
75 int opt, void *data, size_t *len);
76static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
77 int opt, void *data, size_t len);
78
79/* Network Interface functions */
80static void ipsec_start(ifnet_t interface);
81static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
82static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
83 protocol_family_t *protocol);
84static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
85 const struct ifnet_demux_desc *demux_array,
86 u_int32_t demux_count);
87static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
88static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
89static void ipsec_detached(ifnet_t interface);
90
91/* Protocol handlers */
92static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
93static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
94 mbuf_t m, char *frame_header);
95static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
96 mbuf_t *packet, const struct sockaddr *dest, void *route,
97 char *frame_type, char *link_layer_dest);
98
99static kern_ctl_ref ipsec_kctlref;
100static lck_attr_t *ipsec_lck_attr;
101static lck_grp_attr_t *ipsec_lck_grp_attr;
102static lck_grp_t *ipsec_lck_grp;
103static lck_mtx_t ipsec_lock;
104
105#if IPSEC_NEXUS
106
107SYSCTL_DECL(_net_ipsec);
108SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
109static int if_ipsec_verify_interface_creation = 0;
110SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
111
112#define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
113
114#define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
115#define IPSEC_IF_DEFAULT_RING_SIZE 64
116#define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
117#define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
118#define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
119
120#define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
121#define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
122#define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
123#define IPSEC_NETIF_WMM_RX_RING_COUNT 1
124#define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
125#define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
126
127#define IPSEC_IF_MIN_RING_SIZE 8
128#define IPSEC_IF_MAX_RING_SIZE 1024
129
130#define IPSEC_IF_MIN_SLOT_SIZE 1024
131#define IPSEC_IF_MAX_SLOT_SIZE 4096
132
133#define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
134
135static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
136
137static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
138static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
139static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
140
141static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
142static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
143static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
144
145SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
146SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
147 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
148SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
149 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
150SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
151 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
152
153static int if_ipsec_debug = 0;
154SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
155
156static errno_t
157ipsec_register_nexus(void);
158
159typedef struct ipsec_nx {
160 uuid_t if_provider;
161 uuid_t if_instance;
162 uuid_t fsw_provider;
163 uuid_t fsw_instance;
164 uuid_t fsw_device;
165 uuid_t fsw_host;
166 uuid_t fsw_agent;
167} *ipsec_nx_t;
168
169static nexus_controller_t ipsec_ncd;
170static int ipsec_ncd_refcount;
171static uuid_t ipsec_kpipe_uuid;
172
173#endif // IPSEC_NEXUS
174
175/* Control block allocated for each kernel control connection */
176struct ipsec_pcb {
177 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
178 kern_ctl_ref ipsec_ctlref;
179 ifnet_t ipsec_ifp;
180 u_int32_t ipsec_unit;
181 u_int32_t ipsec_unique_id;
182 // These external flags can be set with IPSEC_OPT_FLAGS
183 u_int32_t ipsec_external_flags;
184 // These internal flags are only used within this driver
185 u_int32_t ipsec_internal_flags;
186 u_int32_t ipsec_input_frag_size;
187 bool ipsec_frag_size_set;
188 int ipsec_ext_ifdata_stats;
189 mbuf_svc_class_t ipsec_output_service_class;
190 char ipsec_if_xname[IFXNAMSIZ];
191 char ipsec_unique_name[IFXNAMSIZ];
192 // PCB lock protects state fields, like ipsec_kpipe_count
193 decl_lck_rw_data(, ipsec_pcb_lock);
194 // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
195 decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
196 u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */
197 u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */
198 u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */
199
200#if IPSEC_NEXUS
201 lck_mtx_t ipsec_input_chain_lock;
202 lck_mtx_t ipsec_kpipe_encrypt_lock;
203 lck_mtx_t ipsec_kpipe_decrypt_lock;
204 struct mbuf * ipsec_input_chain;
205 struct mbuf * ipsec_input_chain_last;
206 u_int32_t ipsec_input_chain_count;
207 // Input chain lock protects the list of input mbufs
208 // The input chain lock must be taken AFTER the PCB lock if both are held
209 struct ipsec_nx ipsec_nx;
210 u_int32_t ipsec_kpipe_count;
211 pid_t ipsec_kpipe_pid;
212 uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
213 void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
214 void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
215 kern_pbufpool_t ipsec_kpipe_pp;
216 u_int32_t ipsec_kpipe_tx_ring_size;
217 u_int32_t ipsec_kpipe_rx_ring_size;
218
219 kern_nexus_t ipsec_netif_nexus;
220 kern_pbufpool_t ipsec_netif_pp;
221 void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
222 void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
223 uint64_t ipsec_netif_txring_size;
224
225 u_int32_t ipsec_slot_size;
226 u_int32_t ipsec_netif_ring_size;
227 u_int32_t ipsec_tx_fsw_ring_size;
228 u_int32_t ipsec_rx_fsw_ring_size;
229 bool ipsec_use_netif;
230 bool ipsec_needs_netagent;
231#endif // IPSEC_NEXUS
232};
233
234/* These are internal flags not exposed outside this file */
235#define IPSEC_FLAGS_KPIPE_ALLOCATED 1
236
237/* data movement refcounting functions */
238static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
239
240/* Data path states */
241#define IPSEC_PCB_DATA_PATH_READY 0x1
242
243/* Macros to set/clear/test data path states */
244#define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
245#define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
246#define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
247
248#if IPSEC_NEXUS
249/* Macros to clear/set/test flags. */
250static inline void
251ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
252{
253 pcb->ipsec_internal_flags |= flag;
254}
255static inline void
256ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
257{
258 pcb->ipsec_internal_flags &= ~flag;
259}
260
261static inline bool
262ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
263{
264 return !!(pcb->ipsec_internal_flags & flag);
265}
266#endif // IPSEC_NEXUS
267
268TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
269
270static ZONE_DECLARE(ipsec_pcb_zone, "net.if_ipsec",
271 sizeof(struct ipsec_pcb), ZC_ZFREE_CLEARMEM);
272
273#define IPSECQ_MAXLEN 256
274
275#if IPSEC_NEXUS
276static int
277sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
278{
279#pragma unused(arg1, arg2)
280 int value = if_ipsec_ring_size;
281
282 int error = sysctl_handle_int(oidp, &value, 0, req);
283 if (error || !req->newptr) {
284 return error;
285 }
286
287 if (value < IPSEC_IF_MIN_RING_SIZE ||
288 value > IPSEC_IF_MAX_RING_SIZE) {
289 return EINVAL;
290 }
291
292 if_ipsec_ring_size = value;
293
294 return 0;
295}
296
297static int
298sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
299{
300#pragma unused(arg1, arg2)
301 int value = if_ipsec_tx_fsw_ring_size;
302
303 int error = sysctl_handle_int(oidp, &value, 0, req);
304 if (error || !req->newptr) {
305 return error;
306 }
307
308 if (value < IPSEC_IF_MIN_RING_SIZE ||
309 value > IPSEC_IF_MAX_RING_SIZE) {
310 return EINVAL;
311 }
312
313 if_ipsec_tx_fsw_ring_size = value;
314
315 return 0;
316}
317
318static int
319sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
320{
321#pragma unused(arg1, arg2)
322 int value = if_ipsec_rx_fsw_ring_size;
323
324 int error = sysctl_handle_int(oidp, &value, 0, req);
325 if (error || !req->newptr) {
326 return error;
327 }
328
329 if (value < IPSEC_IF_MIN_RING_SIZE ||
330 value > IPSEC_IF_MAX_RING_SIZE) {
331 return EINVAL;
332 }
333
334 if_ipsec_rx_fsw_ring_size = value;
335
336 return 0;
337}
338
339
340static inline bool
341ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
342{
343 return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
344}
345
346#endif // IPSEC_NEXUS
347
348errno_t
349ipsec_register_control(void)
350{
351 struct kern_ctl_reg kern_ctl;
352 errno_t result = 0;
353
354#if IPSEC_NEXUS
355 ipsec_register_nexus();
356#endif // IPSEC_NEXUS
357
358 TAILQ_INIT(&ipsec_head);
359
360 bzero(&kern_ctl, sizeof(kern_ctl));
361 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
362 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
363 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
364 kern_ctl.ctl_sendsize = 64 * 1024;
365 kern_ctl.ctl_recvsize = 64 * 1024;
366 kern_ctl.ctl_setup = ipsec_ctl_setup;
367 kern_ctl.ctl_bind = ipsec_ctl_bind;
368 kern_ctl.ctl_connect = ipsec_ctl_connect;
369 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
370 kern_ctl.ctl_send = ipsec_ctl_send;
371 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
372 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
373
374 result = ctl_register(&kern_ctl, &ipsec_kctlref);
375 if (result != 0) {
376 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
377 return result;
378 }
379
380 /* Register the protocol plumbers */
381 if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
382 ipsec_attach_proto, NULL)) != 0) {
383 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
384 result);
385 ctl_deregister(ipsec_kctlref);
386 return result;
387 }
388
389 /* Register the protocol plumbers */
390 if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
391 ipsec_attach_proto, NULL)) != 0) {
392 proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
393 ctl_deregister(ipsec_kctlref);
394 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
395 result);
396 return result;
397 }
398
399 ipsec_lck_attr = lck_attr_alloc_init();
400 ipsec_lck_grp_attr = lck_grp_attr_alloc_init();
401 ipsec_lck_grp = lck_grp_alloc_init("ipsec", ipsec_lck_grp_attr);
402 lck_mtx_init(&ipsec_lock, ipsec_lck_grp, ipsec_lck_attr);
403
404 return 0;
405}
406
407/* Helpers */
408int
409ipsec_interface_isvalid(ifnet_t interface)
410{
411 struct ipsec_pcb *pcb = NULL;
412
413 if (interface == NULL) {
414 return 0;
415 }
416
417 pcb = ifnet_softc(interface);
418
419 if (pcb == NULL) {
420 return 0;
421 }
422
423 /* When ctl disconnects, ipsec_unit is set to 0 */
424 if (pcb->ipsec_unit == 0) {
425 return 0;
426 }
427
428 return 1;
429}
430
431#if IPSEC_NEXUS
432boolean_t
433ipsec_interface_needs_netagent(ifnet_t interface)
434{
435 struct ipsec_pcb *pcb = NULL;
436
437 if (interface == NULL) {
438 return FALSE;
439 }
440
441 pcb = ifnet_softc(interface);
442
443 if (pcb == NULL) {
444 return FALSE;
445 }
446
447 return pcb->ipsec_needs_netagent == true;
448}
449#endif // IPSEC_NEXUS
450
451static errno_t
452ipsec_ifnet_set_attrs(ifnet_t ifp)
453{
454 /* Set flags and additional information. */
455 ifnet_set_mtu(ifp, 1500);
456 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
457
458 /* The interface must generate its own IPv6 LinkLocal address,
459 * if possible following the recommendation of RFC2472 to the 64bit interface ID
460 */
461 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
462
463#if !IPSEC_NEXUS
464 /* Reset the stats in case as the interface may have been recycled */
465 struct ifnet_stats_param stats;
466 bzero(&stats, sizeof(struct ifnet_stats_param));
467 ifnet_set_stat(ifp, &stats);
468#endif // !IPSEC_NEXUS
469
470 return 0;
471}
472
473#if IPSEC_NEXUS
474
475static uuid_t ipsec_nx_dom_prov;
476
477static errno_t
478ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
479{
480 return 0;
481}
482
483static void
484ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
485{
486 // Ignore
487}
488
489static errno_t
490ipsec_register_nexus(void)
491{
492 const struct kern_nexus_domain_provider_init dp_init = {
493 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
494 .nxdpi_flags = 0,
495 .nxdpi_init = ipsec_nxdp_init,
496 .nxdpi_fini = ipsec_nxdp_fini
497 };
498 errno_t err = 0;
499
500 /* ipsec_nxdp_init() is called before this function returns */
501 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
502 (const uint8_t *) "com.apple.ipsec",
503 &dp_init, sizeof(dp_init),
504 &ipsec_nx_dom_prov);
505 if (err != 0) {
506 os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
507 return err;
508 }
509 return 0;
510}
511
512static errno_t
513ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
514{
515 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
516 pcb->ipsec_netif_nexus = nexus;
517 return ipsec_ifnet_set_attrs(ifp);
518}
519
520static errno_t
521ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
522 proc_t p, kern_nexus_t nexus,
523 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
524{
525#pragma unused(nxprov, p)
526#pragma unused(nexus, nexus_port, channel, ch_ctx)
527 return 0;
528}
529
530static errno_t
531ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
532 kern_channel_t channel)
533{
534#pragma unused(nxprov, channel)
535 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
536 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
537 /* Mark the data path as ready */
538 if (ok) {
539 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
540 IPSEC_SET_DATA_PATH_READY(pcb);
541 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
542 }
543 return ok ? 0 : ENXIO;
544}
545
546static void
547ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
548 kern_channel_t channel)
549{
550#pragma unused(nxprov, channel)
551 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
552
553 VERIFY(pcb->ipsec_kpipe_count != 0);
554
555 /* Wait until all threads in the data paths are done. */
556 ipsec_wait_data_move_drain(pcb);
557}
558
559static void
560ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
561 kern_channel_t channel)
562{
563#pragma unused(nxprov, channel)
564 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
565
566 /* Wait until all threads in the data paths are done. */
567 ipsec_wait_data_move_drain(pcb);
568}
569
570static void
571ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
572 kern_channel_t channel)
573{
574#pragma unused(nxprov, channel)
575 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
576 if (pcb->ipsec_netif_nexus == nexus) {
577 pcb->ipsec_netif_nexus = NULL;
578 }
579 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
580}
581
582static errno_t
583ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
584 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
585 void **ring_ctx)
586{
587#pragma unused(nxprov)
588#pragma unused(channel)
589 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
590 uint8_t ring_idx;
591
592 for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
593 if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
594 break;
595 }
596 }
597
598 if (ring_idx == pcb->ipsec_kpipe_count) {
599 uuid_string_t uuidstr;
600 uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
601 os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
602 return ENOENT;
603 }
604
605 *ring_ctx = (void *)(uintptr_t)ring_idx;
606
607 if (!is_tx_ring) {
608 VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
609 pcb->ipsec_kpipe_rxring[ring_idx] = ring;
610 } else {
611 VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
612 pcb->ipsec_kpipe_txring[ring_idx] = ring;
613 }
614 return 0;
615}
616
617static void
618ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
619 kern_channel_ring_t ring)
620{
621#pragma unused(nxprov)
622 bool found = false;
623 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
624
625 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
626 if (pcb->ipsec_kpipe_rxring[i] == ring) {
627 pcb->ipsec_kpipe_rxring[i] = NULL;
628 found = true;
629 } else if (pcb->ipsec_kpipe_txring[i] == ring) {
630 pcb->ipsec_kpipe_txring[i] = NULL;
631 found = true;
632 }
633 }
634 VERIFY(found);
635}
636
637static errno_t
638ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
639 kern_channel_ring_t tx_ring, uint32_t flags)
640{
641#pragma unused(nxprov)
642#pragma unused(flags)
643 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
644
645 if (!ipsec_data_move_begin(pcb)) {
646 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
647 return 0;
648 }
649
650 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
651
652 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
653 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
654 ipsec_data_move_end(pcb);
655 return 0;
656 }
657
658 VERIFY(pcb->ipsec_kpipe_count);
659
660 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
661 if (tx_slot == NULL) {
662 // Nothing to write, bail
663 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
664 ipsec_data_move_end(pcb);
665 return 0;
666 }
667
668 // Signal the netif ring to read
669 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
670 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
671
672 if (rx_ring != NULL) {
673 kern_channel_notify(rx_ring, 0);
674 }
675
676 ipsec_data_move_end(pcb);
677 return 0;
678}
679
680static mbuf_t
681ipsec_encrypt_mbuf(ifnet_t interface,
682 mbuf_t data)
683{
684 struct ipsec_output_state ipsec_state;
685 int error = 0;
686 uint32_t af;
687
688 // Make sure this packet isn't looping through the interface
689 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
690 error = -1;
691 goto ipsec_output_err;
692 }
693
694 // Mark the interface so NECP can evaluate tunnel policy
695 necp_mark_packet_from_interface(data, interface);
696
697 struct ip *ip = mtod(data, struct ip *);
698 u_int ip_version = ip->ip_v;
699
700 switch (ip_version) {
701 case 4: {
702 af = AF_INET;
703
704 memset(&ipsec_state, 0, sizeof(ipsec_state));
705 ipsec_state.m = data;
706 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
707 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
708
709 error = ipsec4_interface_output(&ipsec_state, interface);
710 if (error == 0 && ipsec_state.tunneled == 6) {
711 // Tunneled in IPv6 - packet is gone
712 // TODO: Don't lose mbuf
713 data = NULL;
714 goto done;
715 }
716
717 data = ipsec_state.m;
718 if (error || data == NULL) {
719 if (error) {
720 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
721 }
722 goto ipsec_output_err;
723 }
724 goto done;
725 }
726 case 6: {
727 af = AF_INET6;
728
729 data = ipsec6_splithdr(data);
730 if (data == NULL) {
731 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
732 goto ipsec_output_err;
733 }
734
735 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
736
737 memset(&ipsec_state, 0, sizeof(ipsec_state));
738 ipsec_state.m = data;
739 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
740 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
741
742 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
743 if (error == 0 && ipsec_state.tunneled == 4) {
744 // Tunneled in IPv4 - packet is gone
745 // TODO: Don't lose mbuf
746 data = NULL;
747 goto done;
748 }
749 data = ipsec_state.m;
750 if (error || data == NULL) {
751 if (error) {
752 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
753 }
754 goto ipsec_output_err;
755 }
756 goto done;
757 }
758 default: {
759 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
760 error = -1;
761 goto ipsec_output_err;
762 }
763 }
764
765done:
766 return data;
767
768ipsec_output_err:
769 if (data) {
770 mbuf_freem(data);
771 }
772 return NULL;
773}
774
775static errno_t
776ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
777 kern_channel_ring_t rx_ring, uint32_t flags)
778{
779#pragma unused(nxprov)
780#pragma unused(flags)
781 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
782 struct kern_channel_ring_stat_increment rx_ring_stats;
783 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
784
785 if (!ipsec_data_move_begin(pcb)) {
786 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
787 return 0;
788 }
789
790 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
791
792 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
793 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
794 ipsec_data_move_end(pcb);
795 return 0;
796 }
797
798 VERIFY(pcb->ipsec_kpipe_count);
799 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
800
801 // Reclaim user-released slots
802 (void) kern_channel_reclaim(rx_ring);
803
804 uint32_t avail = kern_channel_available_slot_count(rx_ring);
805 if (avail == 0) {
806 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
807 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
808 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
809 ipsec_data_move_end(pcb);
810 return 0;
811 }
812
813 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
814 if (tx_ring == NULL) {
815 // Net-If TX ring not set up yet, nothing to read
816 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
817 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
818 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
819 ipsec_data_move_end(pcb);
820 return 0;
821 }
822
823 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
824
825 // Unlock ipsec before entering ring
826 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
827
828 (void)kr_enter(tx_ring, TRUE);
829
830 // Lock again after entering and validate
831 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
832 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
833 // Ring no longer valid
834 // Unlock first, then exit ring
835 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
836 kr_exit(tx_ring);
837 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
838 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
839 ipsec_data_move_end(pcb);
840 return 0;
841 }
842
843 struct kern_channel_ring_stat_increment tx_ring_stats;
844 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
845 kern_channel_slot_t tx_pslot = NULL;
846 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
847 if (tx_slot == NULL) {
848 // Nothing to read, don't bother signalling
849 // Unlock first, then exit ring
850 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
851 kr_exit(tx_ring);
852 ipsec_data_move_end(pcb);
853 return 0;
854 }
855
856 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
857 VERIFY(rx_pp != NULL);
858 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
859 kern_channel_slot_t rx_pslot = NULL;
860 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
861
862 while (rx_slot != NULL && tx_slot != NULL) {
863 size_t length = 0;
864 mbuf_t data = NULL;
865 errno_t error = 0;
866
867 // Allocate rx packet
868 kern_packet_t rx_ph = 0;
869 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
870 if (__improbable(error != 0)) {
871 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
872 pcb->ipsec_ifp->if_xname);
873 break;
874 }
875
876 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
877
878 // Advance TX ring
879 tx_pslot = tx_slot;
880 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
881
882 if (tx_ph == 0) {
883 kern_pbufpool_free(rx_pp, rx_ph);
884 continue;
885 }
886
887 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
888 VERIFY(tx_buf != NULL);
889 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
890 VERIFY(tx_baddr != NULL);
891 tx_baddr += kern_buflet_get_data_offset(tx_buf);
892
893 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
894
895 length = MIN(kern_packet_get_data_length(tx_ph),
896 pcb->ipsec_slot_size);
897
898 // Increment TX stats
899 tx_ring_stats.kcrsi_slots_transferred++;
900 tx_ring_stats.kcrsi_bytes_transferred += length;
901
902 if (length > 0) {
903 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
904 if (error == 0) {
905 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
906 if (error == 0) {
907 // Encrypt and send packet
908 lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
909 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
910 lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
911 } else {
912 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
913 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
914 STATS_INC(nifs, NETIF_STATS_DROP);
915 mbuf_freem(data);
916 data = NULL;
917 }
918 } else {
919 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
920 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
921 STATS_INC(nifs, NETIF_STATS_DROP);
922 }
923 } else {
924 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
925 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
926 STATS_INC(nifs, NETIF_STATS_DROP);
927 }
928
929 if (data == NULL) {
930 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
931 kern_pbufpool_free(rx_pp, rx_ph);
932 break;
933 }
934
935 length = mbuf_pkthdr_len(data);
936 if (length > rx_pp->pp_buflet_size) {
937 // Flush data
938 mbuf_freem(data);
939 kern_pbufpool_free(rx_pp, rx_ph);
940 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
941 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
942 continue;
943 }
944
945 // Fillout rx packet
946 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
947 VERIFY(rx_buf != NULL);
948 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
949 VERIFY(rx_baddr != NULL);
950
951 // Copy-in data from mbuf to buflet
952 mbuf_copydata(data, 0, length, (void *)rx_baddr);
953 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
954
955 // Finalize and attach the packet
956 error = kern_buflet_set_data_offset(rx_buf, 0);
957 VERIFY(error == 0);
958 error = kern_buflet_set_data_length(rx_buf, length);
959 VERIFY(error == 0);
960 error = kern_packet_finalize(rx_ph);
961 VERIFY(error == 0);
962 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
963 VERIFY(error == 0);
964
965 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
966 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
967
968 rx_ring_stats.kcrsi_slots_transferred++;
969 rx_ring_stats.kcrsi_bytes_transferred += length;
970
971 if (!pcb->ipsec_ext_ifdata_stats) {
972 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, length, 0);
973 }
974
975 mbuf_freem(data);
976
977 rx_pslot = rx_slot;
978 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
979 }
980
981 if (rx_pslot) {
982 kern_channel_advance_slot(rx_ring, rx_pslot);
983 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
984 }
985
986 if (tx_pslot) {
987 kern_channel_advance_slot(tx_ring, tx_pslot);
988 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
989 (void)kern_channel_reclaim(tx_ring);
990 }
991
992 /* always reenable output */
993 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
994 if (error != 0) {
995 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
996 }
997
998 // Unlock first, then exit ring
999 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1000
1001 if (tx_pslot != NULL) {
1002 kern_channel_notify(tx_ring, 0);
1003 }
1004 kr_exit(tx_ring);
1005
1006 ipsec_data_move_end(pcb);
1007 return 0;
1008}
1009
1010static uint8_t
1011ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1012{
1013 switch (svc_class) {
1014 case KPKT_SC_VO: {
1015 return 0;
1016 }
1017 case KPKT_SC_VI: {
1018 return 1;
1019 }
1020 case KPKT_SC_BE: {
1021 return 2;
1022 }
1023 case KPKT_SC_BK: {
1024 return 3;
1025 }
1026 default: {
1027 VERIFY(0);
1028 return 0;
1029 }
1030 }
1031}
1032
1033static errno_t
1034ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1035 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1036 void **ring_ctx)
1037{
1038#pragma unused(nxprov)
1039#pragma unused(channel)
1040 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1041
1042 if (!is_tx_ring) {
1043 VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1044 pcb->ipsec_netif_rxring[0] = ring;
1045 } else {
1046 uint8_t ring_idx = 0;
1047 if (ipsec_in_wmm_mode(pcb)) {
1048 int err;
1049 kern_packet_svc_class_t svc_class;
1050 err = kern_channel_get_service_class(ring, &svc_class);
1051 VERIFY(err == 0);
1052 ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1053 VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1054 }
1055
1056 *ring_ctx = (void *)(uintptr_t)ring_idx;
1057
1058 VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1059 pcb->ipsec_netif_txring[ring_idx] = ring;
1060 }
1061 return 0;
1062}
1063
1064static void
1065ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1066 kern_channel_ring_t ring)
1067{
1068#pragma unused(nxprov)
1069 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1070 bool found = false;
1071
1072 for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1073 if (pcb->ipsec_netif_rxring[i] == ring) {
1074 pcb->ipsec_netif_rxring[i] = NULL;
1075 VERIFY(!found);
1076 found = true;
1077 }
1078 }
1079 for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1080 if (pcb->ipsec_netif_txring[i] == ring) {
1081 pcb->ipsec_netif_txring[i] = NULL;
1082 VERIFY(!found);
1083 found = true;
1084 }
1085 }
1086 VERIFY(found);
1087}
1088
1089static bool
1090ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1091{
1092 necp_kernel_policy_result necp_result = 0;
1093 necp_kernel_policy_result_parameter necp_result_parameter = {};
1094 uint32_t necp_matched_policy_id = 0;
1095 struct ip_out_args args4 = { };
1096 struct ip6_out_args args6 = { };
1097
1098 // This packet has been marked with IP level policy, do not mark again.
1099 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1100 return true;
1101 }
1102
1103 size_t length = mbuf_pkthdr_len(data);
1104 if (length < sizeof(struct ip)) {
1105 return false;
1106 }
1107
1108 struct ip *ip = mtod(data, struct ip *);
1109 u_int ip_version = ip->ip_v;
1110 switch (ip_version) {
1111 case 4: {
1112 if (interface != NULL) {
1113 args4.ipoa_flags |= IPOAF_BOUND_IF;
1114 args4.ipoa_boundif = interface->if_index;
1115 }
1116 necp_matched_policy_id = necp_ip_output_find_policy_match(data, IP_OUTARGS, &args4, NULL,
1117 &necp_result, &necp_result_parameter);
1118 break;
1119 }
1120 case 6: {
1121 if (interface != NULL) {
1122 args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1123 args6.ip6oa_boundif = interface->if_index;
1124 }
1125 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, IPV6_OUTARGS, &args6, NULL,
1126 &necp_result, &necp_result_parameter);
1127 break;
1128 }
1129 default: {
1130 return false;
1131 }
1132 }
1133
1134 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1135 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1136 /* Drop and flow divert packets should be blocked at the IP layer */
1137 return false;
1138 }
1139
1140 necp_mark_packet_from_ip(data, necp_matched_policy_id);
1141 return true;
1142}
1143
1144static errno_t
1145ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1146 kern_channel_ring_t tx_ring, uint32_t flags)
1147{
1148#pragma unused(nxprov)
1149#pragma unused(flags)
1150 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1151
1152 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1153
1154 if (!ipsec_data_move_begin(pcb)) {
1155 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1156 return 0;
1157 }
1158
1159 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1160
1161 struct kern_channel_ring_stat_increment tx_ring_stats;
1162 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1163 kern_channel_slot_t tx_pslot = NULL;
1164 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1165
1166 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1167
1168 if (tx_slot == NULL) {
1169 // Nothing to write, don't bother signalling
1170 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1171 ipsec_data_move_end(pcb);
1172 return 0;
1173 }
1174
1175 if (pcb->ipsec_kpipe_count &&
1176 ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1177 // Select the corresponding kpipe rx ring
1178 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1179 VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1180 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1181
1182 // Unlock while calling notify
1183 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1184
1185 // Signal the kernel pipe ring to read
1186 if (rx_ring != NULL) {
1187 kern_channel_notify(rx_ring, 0);
1188 }
1189
1190 ipsec_data_move_end(pcb);
1191 return 0;
1192 }
1193
1194 // If we're here, we're injecting into the BSD stack
1195 while (tx_slot != NULL) {
1196 size_t length = 0;
1197 mbuf_t data = NULL;
1198
1199 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1200
1201 // Advance TX ring
1202 tx_pslot = tx_slot;
1203 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1204
1205 if (tx_ph == 0) {
1206 continue;
1207 }
1208
1209 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1210 VERIFY(tx_buf != NULL);
1211 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1212 VERIFY(tx_baddr != 0);
1213 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1214
1215 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1216
1217 length = MIN(kern_packet_get_data_length(tx_ph),
1218 pcb->ipsec_slot_size);
1219
1220 if (length > 0) {
1221 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1222 if (error == 0) {
1223 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1224 if (error == 0) {
1225 // Mark packet from policy
1226 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1227 necp_mark_packet_from_ip(data, policy_id);
1228
1229 // Check policy with NECP
1230 if (!ipsec_netif_check_policy(pcb->ipsec_ifp, data)) {
1231 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1232 STATS_INC(nifs, NETIF_STATS_DROP);
1233 mbuf_freem(data);
1234 data = NULL;
1235 } else {
1236 // Send through encryption
1237 error = ipsec_output(pcb->ipsec_ifp, data);
1238 if (error != 0) {
1239 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1240 }
1241 }
1242 } else {
1243 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1244 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1245 STATS_INC(nifs, NETIF_STATS_DROP);
1246 mbuf_freem(data);
1247 data = NULL;
1248 }
1249 } else {
1250 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1251 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1252 STATS_INC(nifs, NETIF_STATS_DROP);
1253 }
1254 } else {
1255 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1256 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1257 STATS_INC(nifs, NETIF_STATS_DROP);
1258 }
1259
1260 if (data == NULL) {
1261 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1262 break;
1263 }
1264
1265 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1266 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1267
1268 tx_ring_stats.kcrsi_slots_transferred++;
1269 tx_ring_stats.kcrsi_bytes_transferred += length;
1270 }
1271
1272 if (tx_pslot) {
1273 kern_channel_advance_slot(tx_ring, tx_pslot);
1274 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1275 (void)kern_channel_reclaim(tx_ring);
1276 }
1277
1278 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1279 ipsec_data_move_end(pcb);
1280
1281 return 0;
1282}
1283
1284static errno_t
1285ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1286 kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1287{
1288#pragma unused(nxprov)
1289 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1290 boolean_t more = false;
1291 errno_t rc = 0;
1292
1293 VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1294
1295 /*
1296 * Refill and sync the ring; we may be racing against another thread doing
1297 * an RX sync that also wants to do kr_enter(), and so use the blocking
1298 * variant here.
1299 */
1300 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1301 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1302 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1303 pcb->ipsec_if_xname, ring->ckr_name, rc);
1304 }
1305
1306 (void) kr_enter(ring, TRUE);
1307 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1308 if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1309 // ring no longer valid
1310 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1311 kr_exit(ring);
1312 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1313 pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1314 return ENXIO;
1315 }
1316
1317 if (pcb->ipsec_kpipe_count) {
1318 uint32_t tx_available = kern_channel_available_slot_count(ring);
1319 if (pcb->ipsec_netif_txring_size > 0 &&
1320 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1321 // No room left in tx ring, disable output for now
1322 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1323 if (error != 0) {
1324 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1325 }
1326 }
1327 }
1328
1329 if (pcb->ipsec_kpipe_count) {
1330 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1331
1332 // Unlock while calling notify
1333 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1334 // Signal the kernel pipe ring to read
1335 if (rx_ring != NULL) {
1336 kern_channel_notify(rx_ring, 0);
1337 }
1338 } else {
1339 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1340 }
1341
1342 kr_exit(ring);
1343
1344 return 0;
1345}
1346
1347static errno_t
1348ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1349 kern_channel_ring_t ring, __unused uint32_t flags)
1350{
1351 errno_t ret = 0;
1352 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1353
1354 if (!ipsec_data_move_begin(pcb)) {
1355 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1356 return 0;
1357 }
1358
1359 if (ipsec_in_wmm_mode(pcb)) {
1360 for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1361 kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1362 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1363 if (ret) {
1364 break;
1365 }
1366 }
1367 } else {
1368 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1369 }
1370
1371 ipsec_data_move_end(pcb);
1372 return ret;
1373}
1374
1375static errno_t
1376ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1377 kern_channel_ring_t rx_ring, uint32_t flags)
1378{
1379#pragma unused(nxprov)
1380#pragma unused(flags)
1381 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1382 struct kern_channel_ring_stat_increment rx_ring_stats;
1383
1384 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1385
1386 if (!ipsec_data_move_begin(pcb)) {
1387 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1388 return 0;
1389 }
1390
1391 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1392
1393 // Reclaim user-released slots
1394 (void) kern_channel_reclaim(rx_ring);
1395
1396 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1397
1398 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1399 if (avail == 0) {
1400 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1401 ipsec_data_move_end(pcb);
1402 return 0;
1403 }
1404
1405 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1406 VERIFY(rx_pp != NULL);
1407 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1408 kern_channel_slot_t rx_pslot = NULL;
1409 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1410
1411 while (rx_slot != NULL) {
1412 // Check for a waiting packet
1413 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1414 mbuf_t data = pcb->ipsec_input_chain;
1415 if (data == NULL) {
1416 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1417 break;
1418 }
1419
1420 // Allocate rx packet
1421 kern_packet_t rx_ph = 0;
1422 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1423 if (__improbable(error != 0)) {
1424 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1425 STATS_INC(nifs, NETIF_STATS_DROP);
1426 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1427 break;
1428 }
1429
1430 // Advance waiting packets
1431 if (pcb->ipsec_input_chain_count > 0) {
1432 pcb->ipsec_input_chain_count--;
1433 }
1434 pcb->ipsec_input_chain = data->m_nextpkt;
1435 data->m_nextpkt = NULL;
1436 if (pcb->ipsec_input_chain == NULL) {
1437 pcb->ipsec_input_chain_last = NULL;
1438 }
1439 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1440
1441 size_t length = mbuf_pkthdr_len(data);
1442
1443 if (length < sizeof(struct ip)) {
1444 // Flush data
1445 mbuf_freem(data);
1446 kern_pbufpool_free(rx_pp, rx_ph);
1447 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1448 STATS_INC(nifs, NETIF_STATS_DROP);
1449 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1450 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1451 continue;
1452 }
1453
1454 uint32_t af = 0;
1455 struct ip *ip = mtod(data, struct ip *);
1456 u_int ip_version = ip->ip_v;
1457 switch (ip_version) {
1458 case 4: {
1459 af = AF_INET;
1460 break;
1461 }
1462 case 6: {
1463 af = AF_INET6;
1464 break;
1465 }
1466 default: {
1467 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1468 pcb->ipsec_ifp->if_xname, ip_version);
1469 break;
1470 }
1471 }
1472
1473 if (length > rx_pp->pp_buflet_size ||
1474 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1475 // We need to fragment to send up into the netif
1476
1477 u_int32_t fragment_mtu = rx_pp->pp_buflet_size;
1478 if (pcb->ipsec_frag_size_set &&
1479 pcb->ipsec_input_frag_size < rx_pp->pp_buflet_size) {
1480 fragment_mtu = pcb->ipsec_input_frag_size;
1481 }
1482
1483 mbuf_t fragment_chain = NULL;
1484 switch (af) {
1485 case AF_INET: {
1486 // ip_fragment expects the length in host order
1487 ip->ip_len = ntohs(ip->ip_len);
1488
1489 // ip_fragment will modify the original data, don't free
1490 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1491 if (fragment_error == 0 && data != NULL) {
1492 fragment_chain = data;
1493 } else {
1494 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1495 STATS_INC(nifs, NETIF_STATS_DROP);
1496 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1497 pcb->ipsec_ifp->if_xname, length, fragment_error);
1498 }
1499 break;
1500 }
1501 case AF_INET6: {
1502 if (length < sizeof(struct ip6_hdr)) {
1503 mbuf_freem(data);
1504 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1505 STATS_INC(nifs, NETIF_STATS_DROP);
1506 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1507 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1508 } else {
1509 // ip6_do_fragmentation will free the original data on success only
1510 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1511
1512 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1513 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid()));
1514 if (fragment_error == 0 && data != NULL) {
1515 fragment_chain = data;
1516 } else {
1517 mbuf_freem(data);
1518 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1519 STATS_INC(nifs, NETIF_STATS_DROP);
1520 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1521 pcb->ipsec_ifp->if_xname, length, fragment_error);
1522 }
1523 }
1524 break;
1525 }
1526 default: {
1527 // Cannot fragment unknown families
1528 mbuf_freem(data);
1529 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1530 STATS_INC(nifs, NETIF_STATS_DROP);
1531 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1532 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1533 break;
1534 }
1535 }
1536
1537 if (fragment_chain != NULL) {
1538 // Add fragments to chain before continuing
1539 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1540 if (pcb->ipsec_input_chain != NULL) {
1541 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1542 } else {
1543 pcb->ipsec_input_chain = fragment_chain;
1544 }
1545 pcb->ipsec_input_chain_count++;
1546 while (fragment_chain->m_nextpkt) {
1547 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1548 fragment_chain = fragment_chain->m_nextpkt;
1549 pcb->ipsec_input_chain_count++;
1550 }
1551 pcb->ipsec_input_chain_last = fragment_chain;
1552 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1553 }
1554
1555 // Make sure to free unused rx packet
1556 kern_pbufpool_free(rx_pp, rx_ph);
1557
1558 continue;
1559 }
1560
1561 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1562
1563 // Fillout rx packet
1564 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1565 VERIFY(rx_buf != NULL);
1566 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1567 VERIFY(rx_baddr != NULL);
1568
1569 // Copy-in data from mbuf to buflet
1570 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1571 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1572
1573 // Finalize and attach the packet
1574 error = kern_buflet_set_data_offset(rx_buf, 0);
1575 VERIFY(error == 0);
1576 error = kern_buflet_set_data_length(rx_buf, length);
1577 VERIFY(error == 0);
1578 error = kern_packet_set_headroom(rx_ph, 0);
1579 VERIFY(error == 0);
1580 error = kern_packet_finalize(rx_ph);
1581 VERIFY(error == 0);
1582 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1583 VERIFY(error == 0);
1584
1585 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1586 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1587 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1588
1589 rx_ring_stats.kcrsi_slots_transferred++;
1590 rx_ring_stats.kcrsi_bytes_transferred += length;
1591
1592 if (!pcb->ipsec_ext_ifdata_stats) {
1593 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1594 }
1595
1596 mbuf_freem(data);
1597
1598 // Advance ring
1599 rx_pslot = rx_slot;
1600 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1601 }
1602
1603 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1604 struct kern_channel_ring_stat_increment tx_ring_stats;
1605 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1606 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1607 kern_channel_slot_t tx_pslot = NULL;
1608 kern_channel_slot_t tx_slot = NULL;
1609 if (tx_ring == NULL) {
1610 // Net-If TX ring not set up yet, nothing to read
1611 goto done;
1612 }
1613
1614
1615 // Unlock ipsec before entering ring
1616 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1617
1618 (void)kr_enter(tx_ring, TRUE);
1619
1620 // Lock again after entering and validate
1621 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1622
1623 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1624 goto done;
1625 }
1626
1627 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1628 if (tx_slot == NULL) {
1629 // Nothing to read, don't bother signalling
1630 goto done;
1631 }
1632
1633 while (rx_slot != NULL && tx_slot != NULL) {
1634 size_t length = 0;
1635 mbuf_t data = NULL;
1636 errno_t error = 0;
1637 uint32_t af;
1638
1639 // Allocate rx packet
1640 kern_packet_t rx_ph = 0;
1641 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1642 if (__improbable(error != 0)) {
1643 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1644 STATS_INC(nifs, NETIF_STATS_DROP);
1645 break;
1646 }
1647
1648 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1649
1650 // Advance TX ring
1651 tx_pslot = tx_slot;
1652 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1653
1654 if (tx_ph == 0) {
1655 kern_pbufpool_free(rx_pp, rx_ph);
1656 continue;
1657 }
1658
1659 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1660 VERIFY(tx_buf != NULL);
1661 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1662 VERIFY(tx_baddr != 0);
1663 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1664
1665 length = MIN(kern_packet_get_data_length(tx_ph),
1666 pcb->ipsec_slot_size);
1667
1668 // Increment TX stats
1669 tx_ring_stats.kcrsi_slots_transferred++;
1670 tx_ring_stats.kcrsi_bytes_transferred += length;
1671
1672 if (length >= sizeof(struct ip)) {
1673 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1674 if (error == 0) {
1675 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1676 if (error == 0) {
1677 lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1678 struct ip *ip = mtod(data, struct ip *);
1679 u_int ip_version = ip->ip_v;
1680 switch (ip_version) {
1681 case 4: {
1682 af = AF_INET;
1683 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1684 ip->ip_off = ntohs(ip->ip_off);
1685
1686 if (length < ip->ip_len) {
1687 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1688 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1689 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1690 STATS_INC(nifs, NETIF_STATS_DROP);
1691 mbuf_freem(data);
1692 data = NULL;
1693 } else {
1694 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1695 }
1696 break;
1697 }
1698 case 6: {
1699 if (length < sizeof(struct ip6_hdr)) {
1700 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1701 pcb->ipsec_ifp->if_xname, length);
1702 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1703 STATS_INC(nifs, NETIF_STATS_DROP);
1704 mbuf_freem(data);
1705 data = NULL;
1706 } else {
1707 af = AF_INET6;
1708 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1709 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1710 if (length < ip6_len) {
1711 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1712 pcb->ipsec_ifp->if_xname, length, ip6_len);
1713 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1714 STATS_INC(nifs, NETIF_STATS_DROP);
1715 mbuf_freem(data);
1716 data = NULL;
1717 } else {
1718 int offset = sizeof(struct ip6_hdr);
1719 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1720 }
1721 }
1722 break;
1723 }
1724 default: {
1725 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
1726 pcb->ipsec_ifp->if_xname, ip_version);
1727 STATS_INC(nifs, NETIF_STATS_DROP);
1728 mbuf_freem(data);
1729 data = NULL;
1730 break;
1731 }
1732 }
1733 lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1734 } else {
1735 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1736 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1737 STATS_INC(nifs, NETIF_STATS_DROP);
1738 mbuf_freem(data);
1739 data = NULL;
1740 }
1741 } else {
1742 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1743 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1744 STATS_INC(nifs, NETIF_STATS_DROP);
1745 }
1746 } else {
1747 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1748 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1749 STATS_INC(nifs, NETIF_STATS_DROP);
1750 }
1751
1752 if (data == NULL) {
1753 // Failed to get decrypted data data
1754 kern_pbufpool_free(rx_pp, rx_ph);
1755 continue;
1756 }
1757
1758 length = mbuf_pkthdr_len(data);
1759 if (length > rx_pp->pp_buflet_size) {
1760 // Flush data
1761 mbuf_freem(data);
1762 kern_pbufpool_free(rx_pp, rx_ph);
1763 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1764 STATS_INC(nifs, NETIF_STATS_DROP);
1765 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1766 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1767 continue;
1768 }
1769
1770 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1771
1772 // Fillout rx packet
1773 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1774 VERIFY(rx_buf != NULL);
1775 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1776 VERIFY(rx_baddr != NULL);
1777
1778 // Copy-in data from mbuf to buflet
1779 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1780 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1781
1782 // Finalize and attach the packet
1783 error = kern_buflet_set_data_offset(rx_buf, 0);
1784 VERIFY(error == 0);
1785 error = kern_buflet_set_data_length(rx_buf, length);
1786 VERIFY(error == 0);
1787 error = kern_packet_set_link_header_offset(rx_ph, 0);
1788 VERIFY(error == 0);
1789 error = kern_packet_set_network_header_offset(rx_ph, 0);
1790 VERIFY(error == 0);
1791 error = kern_packet_finalize(rx_ph);
1792 VERIFY(error == 0);
1793 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1794 VERIFY(error == 0);
1795
1796 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1797 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1798 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1799
1800 rx_ring_stats.kcrsi_slots_transferred++;
1801 rx_ring_stats.kcrsi_bytes_transferred += length;
1802
1803 if (!pcb->ipsec_ext_ifdata_stats) {
1804 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1805 }
1806
1807 mbuf_freem(data);
1808
1809 rx_pslot = rx_slot;
1810 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1811 }
1812
1813done:
1814 if (tx_pslot) {
1815 kern_channel_advance_slot(tx_ring, tx_pslot);
1816 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1817 (void)kern_channel_reclaim(tx_ring);
1818 }
1819
1820 // Unlock first, then exit ring
1821 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1822 if (tx_ring != NULL) {
1823 if (tx_pslot != NULL) {
1824 kern_channel_notify(tx_ring, 0);
1825 }
1826 kr_exit(tx_ring);
1827 }
1828
1829 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1830 }
1831
1832 if (rx_pslot) {
1833 kern_channel_advance_slot(rx_ring, rx_pslot);
1834 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1835 }
1836
1837
1838 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1839
1840 ipsec_data_move_end(pcb);
1841 return 0;
1842}
1843
1844static errno_t
1845ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
1846 struct ifnet_init_eparams *init_params,
1847 struct ifnet **ifp)
1848{
1849 errno_t err;
1850 nexus_controller_t controller = kern_nexus_shared_controller();
1851 struct kern_nexus_net_init net_init;
1852 struct kern_pbufpool_init pp_init;
1853
1854 nexus_name_t provider_name;
1855 snprintf((char *)provider_name, sizeof(provider_name),
1856 "com.apple.netif.%s", pcb->ipsec_if_xname);
1857
1858 struct kern_nexus_provider_init prov_init = {
1859 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1860 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1861 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1862 .nxpi_connected = ipsec_nexus_connected,
1863 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1864 .nxpi_disconnected = ipsec_nexus_disconnected,
1865 .nxpi_ring_init = ipsec_netif_ring_init,
1866 .nxpi_ring_fini = ipsec_netif_ring_fini,
1867 .nxpi_slot_init = NULL,
1868 .nxpi_slot_fini = NULL,
1869 .nxpi_sync_tx = ipsec_netif_sync_tx,
1870 .nxpi_sync_rx = ipsec_netif_sync_rx,
1871 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1872 };
1873
1874 nexus_attr_t nxa = NULL;
1875 err = kern_nexus_attr_create(&nxa);
1876 IPSEC_IF_VERIFY(err == 0);
1877 if (err != 0) {
1878 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1879 __func__, err);
1880 goto failed;
1881 }
1882
1883 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
1884 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1885 VERIFY(err == 0);
1886
1887 // Reset ring size for netif nexus to limit memory usage
1888 uint64_t ring_size = pcb->ipsec_netif_ring_size;
1889 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1890 VERIFY(err == 0);
1891 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1892 VERIFY(err == 0);
1893
1894 assert(err == 0);
1895
1896 if (ipsec_in_wmm_mode(pcb)) {
1897 os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1898 __func__, pcb->ipsec_if_xname);
1899
1900 init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1901
1902 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1903 IPSEC_NETIF_WMM_TX_RING_COUNT);
1904 VERIFY(err == 0);
1905 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1906 IPSEC_NETIF_WMM_RX_RING_COUNT);
1907 VERIFY(err == 0);
1908
1909 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1910 VERIFY(err == 0);
1911 }
1912
1913 pcb->ipsec_netif_txring_size = ring_size;
1914
1915 bzero(&pp_init, sizeof(pp_init));
1916 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1917 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1918 // Note: we need more packets than can be held in the tx and rx rings because
1919 // packets can also be in the AQM queue(s)
1920 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
1921 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1922 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1923 pp_init.kbi_max_frags = 1;
1924 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1925 "%s", provider_name);
1926 pp_init.kbi_ctx = NULL;
1927 pp_init.kbi_ctx_retain = NULL;
1928 pp_init.kbi_ctx_release = NULL;
1929
1930 err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
1931 if (err != 0) {
1932 os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
1933 goto failed;
1934 }
1935
1936 err = kern_nexus_controller_register_provider(controller,
1937 ipsec_nx_dom_prov,
1938 provider_name,
1939 &prov_init,
1940 sizeof(prov_init),
1941 nxa,
1942 &pcb->ipsec_nx.if_provider);
1943 IPSEC_IF_VERIFY(err == 0);
1944 if (err != 0) {
1945 os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
1946 __func__, err);
1947 goto failed;
1948 }
1949
1950 bzero(&net_init, sizeof(net_init));
1951 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1952 net_init.nxneti_flags = 0;
1953 net_init.nxneti_eparams = init_params;
1954 net_init.nxneti_lladdr = NULL;
1955 net_init.nxneti_prepare = ipsec_netif_prepare;
1956 net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
1957 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
1958 err = kern_nexus_controller_alloc_net_provider_instance(controller,
1959 pcb->ipsec_nx.if_provider,
1960 pcb,
1961 &pcb->ipsec_nx.if_instance,
1962 &net_init,
1963 ifp);
1964 IPSEC_IF_VERIFY(err == 0);
1965 if (err != 0) {
1966 os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
1967 __func__, err);
1968 kern_nexus_controller_deregister_provider(controller,
1969 pcb->ipsec_nx.if_provider);
1970 uuid_clear(pcb->ipsec_nx.if_provider);
1971 goto failed;
1972 }
1973
1974failed:
1975 if (nxa) {
1976 kern_nexus_attr_destroy(nxa);
1977 }
1978 if (err && pcb->ipsec_netif_pp != NULL) {
1979 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1980 pcb->ipsec_netif_pp = NULL;
1981 }
1982 return err;
1983}
1984
1985static void
1986ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1987{
1988 nexus_controller_t controller = kern_nexus_shared_controller();
1989 errno_t err;
1990
1991 if (!uuid_is_null(instance)) {
1992 err = kern_nexus_controller_free_provider_instance(controller,
1993 instance);
1994 if (err != 0) {
1995 os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
1996 __func__, err);
1997 }
1998 uuid_clear(instance);
1999 }
2000 if (!uuid_is_null(provider)) {
2001 err = kern_nexus_controller_deregister_provider(controller,
2002 provider);
2003 if (err != 0) {
2004 os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
2005 }
2006 uuid_clear(provider);
2007 }
2008 return;
2009}
2010
2011static void
2012ipsec_nexus_detach(struct ipsec_pcb *pcb)
2013{
2014 ipsec_nx_t nx = &pcb->ipsec_nx;
2015 nexus_controller_t controller = kern_nexus_shared_controller();
2016 errno_t err;
2017
2018 if (!uuid_is_null(nx->fsw_host)) {
2019 err = kern_nexus_ifdetach(controller,
2020 nx->fsw_instance,
2021 nx->fsw_host);
2022 if (err != 0) {
2023 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms host failed %d\n",
2024 __func__, err);
2025 }
2026 }
2027
2028 if (!uuid_is_null(nx->fsw_device)) {
2029 err = kern_nexus_ifdetach(controller,
2030 nx->fsw_instance,
2031 nx->fsw_device);
2032 if (err != 0) {
2033 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
2034 __func__, err);
2035 }
2036 }
2037
2038 ipsec_detach_provider_and_instance(nx->if_provider,
2039 nx->if_instance);
2040 ipsec_detach_provider_and_instance(nx->fsw_provider,
2041 nx->fsw_instance);
2042
2043 if (pcb->ipsec_netif_pp != NULL) {
2044 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2045 pcb->ipsec_netif_pp = NULL;
2046 }
2047 memset(nx, 0, sizeof(*nx));
2048}
2049
2050static errno_t
2051ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
2052 const char *type_name,
2053 const char *ifname,
2054 uuid_t *provider, uuid_t *instance)
2055{
2056 nexus_attr_t attr = NULL;
2057 nexus_controller_t controller = kern_nexus_shared_controller();
2058 uuid_t dom_prov;
2059 errno_t err;
2060 struct kern_nexus_init init;
2061 nexus_name_t provider_name;
2062
2063 err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
2064 &dom_prov);
2065 IPSEC_IF_VERIFY(err == 0);
2066 if (err != 0) {
2067 os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2068 __func__, type_name, err);
2069 goto failed;
2070 }
2071
2072 err = kern_nexus_attr_create(&attr);
2073 IPSEC_IF_VERIFY(err == 0);
2074 if (err != 0) {
2075 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2076 __func__, err);
2077 goto failed;
2078 }
2079
2080 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2081 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2082 VERIFY(err == 0);
2083
2084 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2085 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2086 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2087 VERIFY(err == 0);
2088 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2089 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2090 VERIFY(err == 0);
2091 /*
2092 * Configure flowswitch to use super-packet (multi-buflet).
2093 * This allows flowswitch to perform intra-stack packet aggregation.
2094 */
2095 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
2096 sk_fsw_rx_agg_tcp ? NX_PBUF_FRAGS_MAX : 1);
2097 VERIFY(err == 0);
2098
2099 snprintf((char *)provider_name, sizeof(provider_name),
2100 "com.apple.%s.%s", type_name, ifname);
2101 err = kern_nexus_controller_register_provider(controller,
2102 dom_prov,
2103 provider_name,
2104 NULL,
2105 0,
2106 attr,
2107 provider);
2108 kern_nexus_attr_destroy(attr);
2109 attr = NULL;
2110 IPSEC_IF_VERIFY(err == 0);
2111 if (err != 0) {
2112 os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2113 __func__, type_name, err);
2114 goto failed;
2115 }
2116 bzero(&init, sizeof(init));
2117 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2118 err = kern_nexus_controller_alloc_provider_instance(controller,
2119 *provider,
2120 NULL,
2121 instance, &init);
2122 IPSEC_IF_VERIFY(err == 0);
2123 if (err != 0) {
2124 os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2125 __func__, type_name, err);
2126 kern_nexus_controller_deregister_provider(controller,
2127 *provider);
2128 uuid_clear(*provider);
2129 }
2130failed:
2131 return err;
2132}
2133
2134static errno_t
2135ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2136{
2137 nexus_controller_t controller = kern_nexus_shared_controller();
2138 errno_t err = 0;
2139 ipsec_nx_t nx = &pcb->ipsec_nx;
2140
2141 // Allocate flowswitch
2142 err = ipsec_create_fs_provider_and_instance(pcb,
2143 "flowswitch",
2144 pcb->ipsec_ifp->if_xname,
2145 &nx->fsw_provider,
2146 &nx->fsw_instance);
2147 if (err != 0) {
2148 os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2149 __func__);
2150 goto failed;
2151 }
2152
2153 // Attach flowswitch to device port
2154 err = kern_nexus_ifattach(controller, nx->fsw_instance,
2155 NULL, nx->if_instance,
2156 FALSE, &nx->fsw_device);
2157 if (err != 0) {
2158 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2159 goto failed;
2160 }
2161
2162 // Attach flowswitch to host port
2163 err = kern_nexus_ifattach(controller, nx->fsw_instance,
2164 NULL, nx->if_instance,
2165 TRUE, &nx->fsw_host);
2166 if (err != 0) {
2167 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms host %d\n", __func__, err);
2168 goto failed;
2169 }
2170
2171 // Extract the agent UUID and save for later
2172 struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2173 if (flowswitch_nx != NULL) {
2174 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2175 if (flowswitch != NULL) {
2176 FSW_RLOCK(flowswitch);
2177 uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
2178 FSW_UNLOCK(flowswitch);
2179 } else {
2180 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2181 }
2182 nx_release(flowswitch_nx);
2183 } else {
2184 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2185 }
2186
2187 return 0;
2188
2189failed:
2190 ipsec_nexus_detach(pcb);
2191
2192 errno_t detach_error = 0;
2193 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
2194 panic("ipsec_flowswitch_attach - ifnet_detach failed: %d\n", detach_error);
2195 /* NOT REACHED */
2196 }
2197
2198 return err;
2199}
2200
2201#pragma mark Kernel Pipe Nexus
2202
2203static errno_t
2204ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2205{
2206 nexus_attr_t nxa = NULL;
2207 errno_t result;
2208
2209 lck_mtx_lock(&ipsec_lock);
2210 if (ipsec_ncd_refcount++) {
2211 lck_mtx_unlock(&ipsec_lock);
2212 return 0;
2213 }
2214
2215 result = kern_nexus_controller_create(&ipsec_ncd);
2216 if (result) {
2217 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
2218 __FUNCTION__, result);
2219 goto done;
2220 }
2221
2222 uuid_t dom_prov;
2223 result = kern_nexus_get_default_domain_provider(
2224 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
2225 if (result) {
2226 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
2227 __FUNCTION__, result);
2228 goto done;
2229 }
2230
2231 struct kern_nexus_provider_init prov_init = {
2232 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2233 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2234 .nxpi_pre_connect = ipsec_nexus_pre_connect,
2235 .nxpi_connected = ipsec_nexus_connected,
2236 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2237 .nxpi_disconnected = ipsec_nexus_disconnected,
2238 .nxpi_ring_init = ipsec_kpipe_ring_init,
2239 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
2240 .nxpi_slot_init = NULL,
2241 .nxpi_slot_fini = NULL,
2242 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
2243 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
2244 .nxpi_tx_doorbell = NULL,
2245 };
2246
2247 result = kern_nexus_attr_create(&nxa);
2248 if (result) {
2249 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2250 __FUNCTION__, result);
2251 goto done;
2252 }
2253
2254 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2255 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2256 VERIFY(result == 0);
2257
2258 // Reset ring size for kernel pipe nexus to limit memory usage
2259 // Note: It's better to have less on slots on the kpipe TX ring than the netif
2260 // so back pressure is applied at the AQM layer
2261 uint64_t ring_size =
2262 pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2263 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2264 if_ipsec_ring_size;
2265 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2266 VERIFY(result == 0);
2267
2268 ring_size =
2269 pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2270 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2271 if_ipsec_ring_size;
2272 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2273 VERIFY(result == 0);
2274
2275 result = kern_nexus_controller_register_provider(ipsec_ncd,
2276 dom_prov,
2277 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
2278 &prov_init,
2279 sizeof(prov_init),
2280 nxa,
2281 &ipsec_kpipe_uuid);
2282 if (result) {
2283 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
2284 __FUNCTION__, result);
2285 goto done;
2286 }
2287
2288done:
2289 if (nxa) {
2290 kern_nexus_attr_destroy(nxa);
2291 }
2292
2293 if (result) {
2294 if (ipsec_ncd) {
2295 kern_nexus_controller_destroy(ipsec_ncd);
2296 ipsec_ncd = NULL;
2297 }
2298 ipsec_ncd_refcount = 0;
2299 }
2300
2301 lck_mtx_unlock(&ipsec_lock);
2302
2303 return result;
2304}
2305
2306static void
2307ipsec_unregister_kernel_pipe_nexus(void)
2308{
2309 lck_mtx_lock(&ipsec_lock);
2310
2311 VERIFY(ipsec_ncd_refcount > 0);
2312
2313 if (--ipsec_ncd_refcount == 0) {
2314 kern_nexus_controller_destroy(ipsec_ncd);
2315 ipsec_ncd = NULL;
2316 }
2317
2318 lck_mtx_unlock(&ipsec_lock);
2319}
2320
2321/* This structure only holds onto kpipe channels that need to be
2322 * freed in the future, but are cleared from the pcb under lock
2323 */
2324struct ipsec_detached_channels {
2325 int count;
2326 kern_pbufpool_t pp;
2327 uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2328};
2329
2330static void
2331ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2332{
2333 LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
2334
2335 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2336 for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2337 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2338 }
2339 dc->count = 0;
2340 return;
2341 }
2342
2343 dc->count = pcb->ipsec_kpipe_count;
2344
2345 VERIFY(dc->count >= 0);
2346 VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
2347
2348 for (int i = 0; i < dc->count; i++) {
2349 VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2350 uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2351 uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2352 }
2353 for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2354 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2355 }
2356
2357 if (dc->count) {
2358 VERIFY(pcb->ipsec_kpipe_pp);
2359 } else {
2360 VERIFY(!pcb->ipsec_kpipe_pp);
2361 }
2362
2363 dc->pp = pcb->ipsec_kpipe_pp;
2364
2365 pcb->ipsec_kpipe_pp = NULL;
2366
2367 ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2368}
2369
2370static void
2371ipsec_free_channels(struct ipsec_detached_channels *dc)
2372{
2373 if (!dc->count) {
2374 return;
2375 }
2376
2377 for (int i = 0; i < dc->count; i++) {
2378 errno_t result;
2379 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, dc->uuids[i]);
2380 VERIFY(!result);
2381 }
2382
2383 VERIFY(dc->pp);
2384 kern_pbufpool_destroy(dc->pp);
2385
2386 ipsec_unregister_kernel_pipe_nexus();
2387
2388 memset(dc, 0, sizeof(*dc));
2389}
2390
2391static errno_t
2392ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
2393{
2394 struct kern_nexus_init init;
2395 struct kern_pbufpool_init pp_init;
2396 errno_t result;
2397
2398 kauth_cred_t cred = kauth_cred_get();
2399 result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2400 if (result) {
2401 return result;
2402 }
2403
2404 VERIFY(pcb->ipsec_kpipe_count);
2405 VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
2406
2407 result = ipsec_register_kernel_pipe_nexus(pcb);
2408
2409 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2410
2411 if (result) {
2412 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2413 __func__, pcb->ipsec_if_xname);
2414 goto done;
2415 }
2416
2417 VERIFY(ipsec_ncd);
2418
2419 bzero(&pp_init, sizeof(pp_init));
2420 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2421 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2422 // Note: We only needs are many packets as can be held in the tx and rx rings
2423 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
2424 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2425 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2426 pp_init.kbi_max_frags = 1;
2427 pp_init.kbi_flags |= KBIF_QUANTUM;
2428 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
2429 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
2430 pp_init.kbi_ctx = NULL;
2431 pp_init.kbi_ctx_retain = NULL;
2432 pp_init.kbi_ctx_release = NULL;
2433
2434 result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
2435 NULL);
2436 if (result != 0) {
2437 os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2438 __func__, pcb->ipsec_if_xname, result);
2439 goto done;
2440 }
2441
2442 bzero(&init, sizeof(init));
2443 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2444 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
2445
2446 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2447 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2448 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2449 ipsec_kpipe_uuid, pcb, &pcb->ipsec_kpipe_uuid[i], &init);
2450
2451 if (result == 0) {
2452 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2453 pid_t pid = pcb->ipsec_kpipe_pid;
2454 if (!pid) {
2455 pid = proc_pid(proc);
2456 }
2457 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2458 pcb->ipsec_kpipe_uuid[i], &port,
2459 pid, NULL, NULL, 0, NEXUS_BIND_PID);
2460 }
2461
2462 if (result) {
2463 /* Unwind all of them on error */
2464 for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2465 if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2466 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2467 pcb->ipsec_kpipe_uuid[j]);
2468 uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2469 }
2470 }
2471 goto done;
2472 }
2473 }
2474
2475done:
2476 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2477
2478 if (result) {
2479 if (pcb->ipsec_kpipe_pp != NULL) {
2480 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2481 pcb->ipsec_kpipe_pp = NULL;
2482 }
2483 ipsec_unregister_kernel_pipe_nexus();
2484 } else {
2485 ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2486 }
2487
2488 return result;
2489}
2490
2491#endif // IPSEC_NEXUS
2492
2493
2494/* Kernel control functions */
2495
2496static inline int
2497ipsec_find_by_unit(u_int32_t unit)
2498{
2499 struct ipsec_pcb *next_pcb = NULL;
2500 int found = 0;
2501
2502 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2503 if (next_pcb->ipsec_unit == unit) {
2504 found = 1;
2505 break;
2506 }
2507 }
2508
2509 return found;
2510}
2511
2512static inline void
2513ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
2514{
2515#if IPSEC_NEXUS
2516 mbuf_freem_list(pcb->ipsec_input_chain);
2517 pcb->ipsec_input_chain_count = 0;
2518 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, ipsec_lck_grp);
2519 lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, ipsec_lck_grp);
2520 lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, ipsec_lck_grp);
2521#endif // IPSEC_NEXUS
2522 lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, ipsec_lck_grp);
2523 lck_rw_destroy(&pcb->ipsec_pcb_lock, ipsec_lck_grp);
2524 if (!locked) {
2525 lck_mtx_lock(&ipsec_lock);
2526 }
2527 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2528 if (!locked) {
2529 lck_mtx_unlock(&ipsec_lock);
2530 }
2531 zfree(ipsec_pcb_zone, pcb);
2532}
2533
2534static errno_t
2535ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
2536{
2537 if (unit == NULL || unitinfo == NULL) {
2538 return EINVAL;
2539 }
2540
2541 lck_mtx_lock(&ipsec_lock);
2542
2543 /* Find next available unit */
2544 if (*unit == 0) {
2545 *unit = 1;
2546 while (*unit != ctl_maxunit) {
2547 if (ipsec_find_by_unit(*unit)) {
2548 (*unit)++;
2549 } else {
2550 break;
2551 }
2552 }
2553 if (*unit == ctl_maxunit) {
2554 lck_mtx_unlock(&ipsec_lock);
2555 return EBUSY;
2556 }
2557 } else if (ipsec_find_by_unit(*unit)) {
2558 lck_mtx_unlock(&ipsec_lock);
2559 return EBUSY;
2560 }
2561
2562 /* Find some open interface id */
2563 u_int32_t chosen_unique_id = 1;
2564 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2565 if (next_pcb != NULL) {
2566 /* List was not empty, add one to the last item */
2567 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2568 next_pcb = NULL;
2569
2570 /*
2571 * If this wrapped the id number, start looking at
2572 * the front of the list for an unused id.
2573 */
2574 if (chosen_unique_id == 0) {
2575 /* Find the next unused ID */
2576 chosen_unique_id = 1;
2577 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2578 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2579 /* We found a gap */
2580 break;
2581 }
2582
2583 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2584 }
2585 }
2586 }
2587
2588 struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
2589
2590 *unitinfo = pcb;
2591 pcb->ipsec_unit = *unit;
2592 pcb->ipsec_unique_id = chosen_unique_id;
2593
2594 if (next_pcb != NULL) {
2595 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2596 } else {
2597 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2598 }
2599
2600 lck_mtx_unlock(&ipsec_lock);
2601
2602 return 0;
2603}
2604
2605static errno_t
2606ipsec_ctl_bind(kern_ctl_ref kctlref,
2607 struct sockaddr_ctl *sac,
2608 void **unitinfo)
2609{
2610 if (*unitinfo == NULL) {
2611 u_int32_t unit = 0;
2612 (void)ipsec_ctl_setup(&unit, unitinfo);
2613 }
2614
2615 struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
2616 if (pcb == NULL) {
2617 return EINVAL;
2618 }
2619
2620 /* Setup the protocol control block */
2621 pcb->ipsec_ctlref = kctlref;
2622 pcb->ipsec_unit = sac->sc_unit;
2623 pcb->ipsec_output_service_class = MBUF_SC_OAM;
2624
2625#if IPSEC_NEXUS
2626 pcb->ipsec_use_netif = false;
2627 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2628 pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2629 pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2630 pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
2631#endif // IPSEC_NEXUS
2632
2633 lck_rw_init(&pcb->ipsec_pcb_lock, ipsec_lck_grp, ipsec_lck_attr);
2634 lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, ipsec_lck_grp, ipsec_lck_attr);
2635#if IPSEC_NEXUS
2636 pcb->ipsec_input_chain_count = 0;
2637 lck_mtx_init(&pcb->ipsec_input_chain_lock, ipsec_lck_grp, ipsec_lck_attr);
2638 lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, ipsec_lck_grp, ipsec_lck_attr);
2639 lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, ipsec_lck_grp, ipsec_lck_attr);
2640#endif // IPSEC_NEXUS
2641
2642 return 0;
2643}
2644
2645static errno_t
2646ipsec_ctl_connect(kern_ctl_ref kctlref,
2647 struct sockaddr_ctl *sac,
2648 void **unitinfo)
2649{
2650 struct ifnet_init_eparams ipsec_init = {};
2651 errno_t result = 0;
2652
2653 if (*unitinfo == NULL) {
2654 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2655 }
2656
2657 struct ipsec_pcb *pcb = *unitinfo;
2658 if (pcb == NULL) {
2659 return EINVAL;
2660 }
2661
2662 /* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
2663 if (pcb->ipsec_ctlref == NULL) {
2664 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2665 }
2666
2667 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2668 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
2669 os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2670
2671 /* Create the interface */
2672 bzero(&ipsec_init, sizeof(ipsec_init));
2673 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
2674 ipsec_init.len = sizeof(ipsec_init);
2675
2676#if IPSEC_NEXUS
2677 if (pcb->ipsec_use_netif) {
2678 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2679 } else
2680#endif // IPSEC_NEXUS
2681 {
2682 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2683 ipsec_init.start = ipsec_start;
2684 }
2685 ipsec_init.name = "ipsec";
2686 ipsec_init.unit = pcb->ipsec_unit - 1;
2687 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2688 ipsec_init.uniqueid_len = strlen(pcb->ipsec_unique_name);
2689 ipsec_init.family = IFNET_FAMILY_IPSEC;
2690 ipsec_init.type = IFT_OTHER;
2691 ipsec_init.demux = ipsec_demux;
2692 ipsec_init.add_proto = ipsec_add_proto;
2693 ipsec_init.del_proto = ipsec_del_proto;
2694 ipsec_init.softc = pcb;
2695 ipsec_init.ioctl = ipsec_ioctl;
2696 ipsec_init.free = ipsec_detached;
2697
2698#if IPSEC_NEXUS
2699 /* We don't support kpipes without a netif */
2700 if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2701 result = ENOTSUP;
2702 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2703 ipsec_free_pcb(pcb, false);
2704 *unitinfo = NULL;
2705 return result;
2706 }
2707
2708 if (if_ipsec_debug != 0) {
2709 printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2710 "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2711 __func__,
2712 ipsec_init.name, ipsec_init.unit,
2713 pcb->ipsec_use_netif,
2714 pcb->ipsec_kpipe_count,
2715 pcb->ipsec_slot_size,
2716 pcb->ipsec_netif_ring_size,
2717 pcb->ipsec_kpipe_tx_ring_size,
2718 pcb->ipsec_kpipe_rx_ring_size);
2719 }
2720 if (pcb->ipsec_use_netif) {
2721 if (pcb->ipsec_kpipe_count) {
2722 result = ipsec_enable_channel(pcb, current_proc());
2723 if (result) {
2724 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2725 __func__, pcb->ipsec_if_xname);
2726 ipsec_free_pcb(pcb, false);
2727 *unitinfo = NULL;
2728 return result;
2729 }
2730 }
2731
2732 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2733 if (result != 0) {
2734 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
2735 ipsec_free_pcb(pcb, false);
2736 *unitinfo = NULL;
2737 return result;
2738 }
2739
2740 result = ipsec_flowswitch_attach(pcb);
2741 if (result != 0) {
2742 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2743 // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2744 // in ipsec_detached().
2745 *unitinfo = NULL;
2746 return result;
2747 }
2748
2749 /* Attach to bpf */
2750 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2751 } else
2752#endif // IPSEC_NEXUS
2753 {
2754 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2755 if (result != 0) {
2756 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
2757 ipsec_free_pcb(pcb, false);
2758 *unitinfo = NULL;
2759 return result;
2760 }
2761 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2762
2763 /* Attach the interface */
2764 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2765 if (result != 0) {
2766 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
2767 ifnet_release(pcb->ipsec_ifp);
2768 ipsec_free_pcb(pcb, false);
2769 *unitinfo = NULL;
2770 return result;
2771 }
2772
2773 /* Attach to bpf */
2774 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2775 }
2776
2777#if IPSEC_NEXUS
2778 /*
2779 * Mark the data path as ready.
2780 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2781 */
2782 if (pcb->ipsec_kpipe_count == 0) {
2783 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2784 IPSEC_SET_DATA_PATH_READY(pcb);
2785 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2786 }
2787#endif
2788
2789 /* The interfaces resoures allocated, mark it as running */
2790 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2791
2792 return 0;
2793}
2794
2795static errno_t
2796ipsec_detach_ip(ifnet_t interface,
2797 protocol_family_t protocol,
2798 socket_t pf_socket)
2799{
2800 errno_t result = EPROTONOSUPPORT;
2801
2802 /* Attempt a detach */
2803 if (protocol == PF_INET) {
2804 struct ifreq ifr;
2805
2806 bzero(&ifr, sizeof(ifr));
2807 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2808 ifnet_name(interface), ifnet_unit(interface));
2809
2810 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2811 } else if (protocol == PF_INET6) {
2812 struct in6_ifreq ifr6;
2813
2814 bzero(&ifr6, sizeof(ifr6));
2815 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2816 ifnet_name(interface), ifnet_unit(interface));
2817
2818 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2819 }
2820
2821 return result;
2822}
2823
2824static void
2825ipsec_remove_address(ifnet_t interface,
2826 protocol_family_t protocol,
2827 ifaddr_t address,
2828 socket_t pf_socket)
2829{
2830 errno_t result = 0;
2831
2832 /* Attempt a detach */
2833 if (protocol == PF_INET) {
2834 struct ifreq ifr;
2835
2836 bzero(&ifr, sizeof(ifr));
2837 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2838 ifnet_name(interface), ifnet_unit(interface));
2839 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2840 if (result != 0) {
2841 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
2842 } else {
2843 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2844 if (result != 0) {
2845 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
2846 }
2847 }
2848 } else if (protocol == PF_INET6) {
2849 struct in6_ifreq ifr6;
2850
2851 bzero(&ifr6, sizeof(ifr6));
2852 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2853 ifnet_name(interface), ifnet_unit(interface));
2854 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
2855 sizeof(ifr6.ifr_addr));
2856 if (result != 0) {
2857 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
2858 result);
2859 } else {
2860 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2861 if (result != 0) {
2862 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
2863 result);
2864 }
2865 }
2866 }
2867}
2868
2869static void
2870ipsec_cleanup_family(ifnet_t interface,
2871 protocol_family_t protocol)
2872{
2873 errno_t result = 0;
2874 socket_t pf_socket = NULL;
2875 ifaddr_t *addresses = NULL;
2876 int i;
2877
2878 if (protocol != PF_INET && protocol != PF_INET6) {
2879 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
2880 return;
2881 }
2882
2883 /* Create a socket for removing addresses and detaching the protocol */
2884 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2885 if (result != 0) {
2886 if (result != EAFNOSUPPORT) {
2887 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
2888 protocol == PF_INET ? "IP" : "IPv6", result);
2889 }
2890 goto cleanup;
2891 }
2892
2893 /* always set SS_PRIV, we want to close and detach regardless */
2894 sock_setpriv(pf_socket, 1);
2895
2896 result = ipsec_detach_ip(interface, protocol, pf_socket);
2897 if (result == 0 || result == ENXIO) {
2898 /* We are done! We either detached or weren't attached. */
2899 goto cleanup;
2900 } else if (result != EBUSY) {
2901 /* Uh, not really sure what happened here... */
2902 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2903 goto cleanup;
2904 }
2905
2906 /*
2907 * At this point, we received an EBUSY error. This means there are
2908 * addresses attached. We should detach them and then try again.
2909 */
2910 result = ifnet_get_address_list_family(interface, &addresses, protocol);
2911 if (result != 0) {
2912 os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2913 ifnet_name(interface), ifnet_unit(interface),
2914 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2915 goto cleanup;
2916 }
2917
2918 for (i = 0; addresses[i] != 0; i++) {
2919 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2920 }
2921 ifnet_free_address_list(addresses);
2922 addresses = NULL;
2923
2924 /*
2925 * The addresses should be gone, we should try the remove again.
2926 */
2927 result = ipsec_detach_ip(interface, protocol, pf_socket);
2928 if (result != 0 && result != ENXIO) {
2929 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
2930 }
2931
2932cleanup:
2933 if (pf_socket != NULL) {
2934 sock_close(pf_socket);
2935 }
2936
2937 if (addresses != NULL) {
2938 ifnet_free_address_list(addresses);
2939 }
2940}
2941
2942static errno_t
2943ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2944 __unused u_int32_t unit,
2945 void *unitinfo)
2946{
2947 struct ipsec_pcb *pcb = unitinfo;
2948 ifnet_t ifp = NULL;
2949 errno_t result = 0;
2950
2951 if (pcb == NULL) {
2952 return EINVAL;
2953 }
2954
2955 /* Wait until all threads in the data paths are done. */
2956 ipsec_wait_data_move_drain(pcb);
2957
2958#if IPSEC_NEXUS
2959 // Tell the nexus to stop all rings
2960 if (pcb->ipsec_netif_nexus != NULL) {
2961 kern_nexus_stop(pcb->ipsec_netif_nexus);
2962 }
2963#endif // IPSEC_NEXUS
2964
2965 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2966
2967#if IPSEC_NEXUS
2968 if (if_ipsec_debug != 0) {
2969 printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2970 pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2971 }
2972
2973 struct ipsec_detached_channels dc;
2974 ipsec_detach_channels(pcb, &dc);
2975#endif // IPSEC_NEXUS
2976
2977 pcb->ipsec_ctlref = NULL;
2978
2979 ifp = pcb->ipsec_ifp;
2980 if (ifp != NULL) {
2981#if IPSEC_NEXUS
2982 if (pcb->ipsec_netif_nexus != NULL) {
2983 /*
2984 * Quiesce the interface and flush any pending outbound packets.
2985 */
2986 if_down(ifp);
2987
2988 /* Increment refcnt, but detach interface */
2989 ifnet_incr_iorefcnt(ifp);
2990 if ((result = ifnet_detach(ifp)) != 0) {
2991 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2992 /* NOT REACHED */
2993 }
2994
2995 /*
2996 * We want to do everything in our power to ensure that the interface
2997 * really goes away when the socket is closed. We must remove IP/IPv6
2998 * addresses and detach the protocols. Finally, we can remove and
2999 * release the interface.
3000 */
3001 key_delsp_for_ipsec_if(ifp);
3002
3003 ipsec_cleanup_family(ifp, AF_INET);
3004 ipsec_cleanup_family(ifp, AF_INET6);
3005
3006 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3007
3008 ipsec_free_channels(&dc);
3009
3010 ipsec_nexus_detach(pcb);
3011
3012 /* Decrement refcnt to finish detaching and freeing */
3013 ifnet_decr_iorefcnt(ifp);
3014 } else
3015#endif // IPSEC_NEXUS
3016 {
3017 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3018
3019#if IPSEC_NEXUS
3020 ipsec_free_channels(&dc);
3021#endif // IPSEC_NEXUS
3022
3023 /*
3024 * We want to do everything in our power to ensure that the interface
3025 * really goes away when the socket is closed. We must remove IP/IPv6
3026 * addresses and detach the protocols. Finally, we can remove and
3027 * release the interface.
3028 */
3029 key_delsp_for_ipsec_if(ifp);
3030
3031 ipsec_cleanup_family(ifp, AF_INET);
3032 ipsec_cleanup_family(ifp, AF_INET6);
3033
3034 /*
3035 * Detach now; ipsec_detach() will be called asynchronously once
3036 * the I/O reference count drops to 0. There we will invoke
3037 * ifnet_release().
3038 */
3039 if ((result = ifnet_detach(ifp)) != 0) {
3040 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
3041 }
3042 }
3043 } else {
3044 // Bound, but not connected
3045 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3046 ipsec_free_pcb(pcb, false);
3047 }
3048
3049 return 0;
3050}
3051
3052static errno_t
3053ipsec_ctl_send(__unused kern_ctl_ref kctlref,
3054 __unused u_int32_t unit,
3055 __unused void *unitinfo,
3056 mbuf_t m,
3057 __unused int flags)
3058{
3059 /* Receive messages from the control socket. Currently unused. */
3060 mbuf_freem(m);
3061 return 0;
3062}
3063
3064static errno_t
3065ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
3066 __unused u_int32_t unit,
3067 void *unitinfo,
3068 int opt,
3069 void *data,
3070 size_t len)
3071{
3072 errno_t result = 0;
3073 struct ipsec_pcb *pcb = unitinfo;
3074 if (pcb == NULL) {
3075 return EINVAL;
3076 }
3077
3078 /* check for privileges for privileged options */
3079 switch (opt) {
3080 case IPSEC_OPT_FLAGS:
3081 case IPSEC_OPT_EXT_IFDATA_STATS:
3082 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3083 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3084 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3085 return EPERM;
3086 }
3087 break;
3088 }
3089
3090 switch (opt) {
3091 case IPSEC_OPT_FLAGS: {
3092 if (len != sizeof(u_int32_t)) {
3093 result = EMSGSIZE;
3094 } else {
3095 pcb->ipsec_external_flags = *(u_int32_t *)data;
3096 }
3097 break;
3098 }
3099
3100 case IPSEC_OPT_EXT_IFDATA_STATS: {
3101 if (len != sizeof(int)) {
3102 result = EMSGSIZE;
3103 break;
3104 }
3105 if (pcb->ipsec_ifp == NULL) {
3106 // Only can set after connecting
3107 result = EINVAL;
3108 break;
3109 }
3110 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3111 break;
3112 }
3113
3114 case IPSEC_OPT_INC_IFDATA_STATS_IN:
3115 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3116 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3117
3118 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3119 result = EINVAL;
3120 break;
3121 }
3122 if (pcb->ipsec_ifp == NULL) {
3123 // Only can set after connecting
3124 result = EINVAL;
3125 break;
3126 }
3127 if (!pcb->ipsec_ext_ifdata_stats) {
3128 result = EINVAL;
3129 break;
3130 }
3131 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3132 ifnet_stat_increment_in(pcb->ipsec_ifp, utsp->utsp_packets,
3133 utsp->utsp_bytes, utsp->utsp_errors);
3134 } else {
3135 ifnet_stat_increment_out(pcb->ipsec_ifp, utsp->utsp_packets,
3136 utsp->utsp_bytes, utsp->utsp_errors);
3137 }
3138 break;
3139 }
3140
3141 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3142 ifnet_t del_ifp = NULL;
3143 char name[IFNAMSIZ];
3144
3145 if (len > IFNAMSIZ - 1) {
3146 result = EMSGSIZE;
3147 break;
3148 }
3149 if (pcb->ipsec_ifp == NULL) {
3150 // Only can set after connecting
3151 result = EINVAL;
3152 break;
3153 }
3154 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3155 bcopy(data, name, len);
3156 name[len] = 0;
3157 result = ifnet_find_by_name(name, &del_ifp);
3158 }
3159 if (result == 0) {
3160 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3161 __func__, pcb->ipsec_ifp->if_xname,
3162 del_ifp ? del_ifp->if_xname : "NULL");
3163
3164 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3165 if (del_ifp) {
3166 ifnet_release(del_ifp);
3167 }
3168 }
3169 break;
3170 }
3171
3172 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3173 if (len != sizeof(int)) {
3174 result = EMSGSIZE;
3175 break;
3176 }
3177 if (pcb->ipsec_ifp == NULL) {
3178 // Only can set after connecting
3179 result = EINVAL;
3180 break;
3181 }
3182 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3183 if (output_service_class == MBUF_SC_UNSPEC) {
3184 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3185 } else {
3186 pcb->ipsec_output_service_class = output_service_class;
3187 }
3188 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3189 __func__, pcb->ipsec_ifp->if_xname,
3190 pcb->ipsec_output_service_class);
3191 break;
3192 }
3193
3194#if IPSEC_NEXUS
3195 case IPSEC_OPT_ENABLE_CHANNEL: {
3196 if (len != sizeof(int)) {
3197 result = EMSGSIZE;
3198 break;
3199 }
3200 if (pcb->ipsec_ifp != NULL) {
3201 // Only can set before connecting
3202 result = EINVAL;
3203 break;
3204 }
3205 if ((*(int *)data) != 0 &&
3206 (*(int *)data) != 1 &&
3207 (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3208 result = EINVAL;
3209 break;
3210 }
3211 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3212 pcb->ipsec_kpipe_count = *(int *)data;
3213 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3214 break;
3215 }
3216
3217 case IPSEC_OPT_CHANNEL_BIND_PID: {
3218 if (len != sizeof(pid_t)) {
3219 result = EMSGSIZE;
3220 break;
3221 }
3222 if (pcb->ipsec_ifp != NULL) {
3223 // Only can set before connecting
3224 result = EINVAL;
3225 break;
3226 }
3227 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3228 pcb->ipsec_kpipe_pid = *(pid_t *)data;
3229 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3230 break;
3231 }
3232
3233 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3234 if (len != sizeof(int)) {
3235 result = EMSGSIZE;
3236 break;
3237 }
3238 if (pcb->ipsec_ifp == NULL) {
3239 // Only can set after connecting
3240 result = EINVAL;
3241 break;
3242 }
3243 if (!if_is_fsw_transport_netagent_enabled()) {
3244 result = ENOTSUP;
3245 break;
3246 }
3247 if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
3248 result = ENOENT;
3249 break;
3250 }
3251
3252 uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3253
3254 if (*(int *)data) {
3255 flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3256 NETAGENT_FLAG_NEXUS_LISTENER);
3257 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3258 pcb->ipsec_needs_netagent = true;
3259 } else {
3260 pcb->ipsec_needs_netagent = false;
3261 flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3262 NETAGENT_FLAG_NEXUS_LISTENER);
3263 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
3264 }
3265 break;
3266 }
3267
3268 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3269 if (len != sizeof(u_int32_t)) {
3270 result = EMSGSIZE;
3271 break;
3272 }
3273 u_int32_t input_frag_size = *(u_int32_t *)data;
3274 if (input_frag_size <= sizeof(struct ip6_hdr)) {
3275 pcb->ipsec_frag_size_set = FALSE;
3276 pcb->ipsec_input_frag_size = 0;
3277 } else {
3278 pcb->ipsec_frag_size_set = TRUE;
3279 pcb->ipsec_input_frag_size = input_frag_size;
3280 }
3281 break;
3282 }
3283 case IPSEC_OPT_ENABLE_NETIF: {
3284 if (len != sizeof(int)) {
3285 result = EMSGSIZE;
3286 break;
3287 }
3288 if (pcb->ipsec_ifp != NULL) {
3289 // Only can set before connecting
3290 result = EINVAL;
3291 break;
3292 }
3293 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3294 pcb->ipsec_use_netif = !!(*(int *)data);
3295 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3296 break;
3297 }
3298 case IPSEC_OPT_SLOT_SIZE: {
3299 if (len != sizeof(u_int32_t)) {
3300 result = EMSGSIZE;
3301 break;
3302 }
3303 if (pcb->ipsec_ifp != NULL) {
3304 // Only can set before connecting
3305 result = EINVAL;
3306 break;
3307 }
3308 u_int32_t slot_size = *(u_int32_t *)data;
3309 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3310 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3311 return EINVAL;
3312 }
3313 pcb->ipsec_slot_size = slot_size;
3314 if (if_ipsec_debug != 0) {
3315 printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3316 }
3317 break;
3318 }
3319 case IPSEC_OPT_NETIF_RING_SIZE: {
3320 if (len != sizeof(u_int32_t)) {
3321 result = EMSGSIZE;
3322 break;
3323 }
3324 if (pcb->ipsec_ifp != NULL) {
3325 // Only can set before connecting
3326 result = EINVAL;
3327 break;
3328 }
3329 u_int32_t ring_size = *(u_int32_t *)data;
3330 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3331 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3332 return EINVAL;
3333 }
3334 pcb->ipsec_netif_ring_size = ring_size;
3335 if (if_ipsec_debug != 0) {
3336 printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3337 }
3338 break;
3339 }
3340 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3341 if (len != sizeof(u_int32_t)) {
3342 result = EMSGSIZE;
3343 break;
3344 }
3345 if (pcb->ipsec_ifp != NULL) {
3346 // Only can set before connecting
3347 result = EINVAL;
3348 break;
3349 }
3350 u_int32_t ring_size = *(u_int32_t *)data;
3351 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3352 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3353 return EINVAL;
3354 }
3355 pcb->ipsec_tx_fsw_ring_size = ring_size;
3356 if (if_ipsec_debug != 0) {
3357 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3358 }
3359 break;
3360 }
3361 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3362 if (len != sizeof(u_int32_t)) {
3363 result = EMSGSIZE;
3364 break;
3365 }
3366 if (pcb->ipsec_ifp != NULL) {
3367 // Only can set before connecting
3368 result = EINVAL;
3369 break;
3370 }
3371 u_int32_t ring_size = *(u_int32_t *)data;
3372 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3373 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3374 return EINVAL;
3375 }
3376 pcb->ipsec_rx_fsw_ring_size = ring_size;
3377 if (if_ipsec_debug != 0) {
3378 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3379 }
3380 break;
3381 }
3382 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3383 if (len != sizeof(u_int32_t)) {
3384 result = EMSGSIZE;
3385 break;
3386 }
3387 if (pcb->ipsec_ifp != NULL) {
3388 // Only can set before connecting
3389 result = EINVAL;
3390 break;
3391 }
3392 u_int32_t ring_size = *(u_int32_t *)data;
3393 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3394 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3395 return EINVAL;
3396 }
3397 pcb->ipsec_kpipe_tx_ring_size = ring_size;
3398 if (if_ipsec_debug != 0) {
3399 printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3400 }
3401 break;
3402 }
3403 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3404 if (len != sizeof(u_int32_t)) {
3405 result = EMSGSIZE;
3406 break;
3407 }
3408 if (pcb->ipsec_ifp != NULL) {
3409 // Only can set before connecting
3410 result = EINVAL;
3411 break;
3412 }
3413 u_int32_t ring_size = *(u_int32_t *)data;
3414 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3415 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3416 return EINVAL;
3417 }
3418 pcb->ipsec_kpipe_rx_ring_size = ring_size;
3419 if (if_ipsec_debug != 0) {
3420 printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3421 }
3422 break;
3423 }
3424
3425#endif // IPSEC_NEXUS
3426
3427 default: {
3428 result = ENOPROTOOPT;
3429 break;
3430 }
3431 }
3432
3433 return result;
3434}
3435
3436static errno_t
3437ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
3438 __unused u_int32_t unit,
3439 void *unitinfo,
3440 int opt,
3441 void *data,
3442 size_t *len)
3443{
3444 errno_t result = 0;
3445 struct ipsec_pcb *pcb = unitinfo;
3446 if (pcb == NULL) {
3447 return EINVAL;
3448 }
3449
3450 switch (opt) {
3451 case IPSEC_OPT_FLAGS: {
3452 if (*len != sizeof(u_int32_t)) {
3453 result = EMSGSIZE;
3454 } else {
3455 *(u_int32_t *)data = pcb->ipsec_external_flags;
3456 }
3457 break;
3458 }
3459
3460 case IPSEC_OPT_EXT_IFDATA_STATS: {
3461 if (*len != sizeof(int)) {
3462 result = EMSGSIZE;
3463 } else {
3464 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
3465 }
3466 break;
3467 }
3468
3469 case IPSEC_OPT_IFNAME: {
3470 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
3471 result = EMSGSIZE;
3472 } else {
3473 if (pcb->ipsec_ifp == NULL) {
3474 // Only can get after connecting
3475 result = EINVAL;
3476 break;
3477 }
3478 *len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
3479 }
3480 break;
3481 }
3482
3483 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3484 if (*len != sizeof(int)) {
3485 result = EMSGSIZE;
3486 } else {
3487 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
3488 }
3489 break;
3490 }
3491
3492#if IPSEC_NEXUS
3493
3494 case IPSEC_OPT_ENABLE_CHANNEL: {
3495 if (*len != sizeof(int)) {
3496 result = EMSGSIZE;
3497 } else {
3498 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3499 *(int *)data = pcb->ipsec_kpipe_count;
3500 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3501 }
3502 break;
3503 }
3504
3505 case IPSEC_OPT_CHANNEL_BIND_PID: {
3506 if (*len != sizeof(pid_t)) {
3507 result = EMSGSIZE;
3508 } else {
3509 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3510 *(pid_t *)data = pcb->ipsec_kpipe_pid;
3511 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3512 }
3513 break;
3514 }
3515
3516 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3517 if (*len != sizeof(int)) {
3518 result = EMSGSIZE;
3519 } else {
3520 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
3521 }
3522 break;
3523 }
3524
3525 case IPSEC_OPT_ENABLE_NETIF: {
3526 if (*len != sizeof(int)) {
3527 result = EMSGSIZE;
3528 } else {
3529 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3530 *(int *)data = !!pcb->ipsec_use_netif;
3531 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3532 }
3533 break;
3534 }
3535
3536 case IPSEC_OPT_GET_CHANNEL_UUID: {
3537 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3538 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3539 result = ENXIO;
3540 } else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
3541 result = EMSGSIZE;
3542 } else {
3543 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3544 uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3545 }
3546 }
3547 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3548 break;
3549 }
3550
3551 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3552 if (*len != sizeof(u_int32_t)) {
3553 result = EMSGSIZE;
3554 } else {
3555 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
3556 }
3557 break;
3558 }
3559 case IPSEC_OPT_SLOT_SIZE: {
3560 if (*len != sizeof(u_int32_t)) {
3561 result = EMSGSIZE;
3562 } else {
3563 *(u_int32_t *)data = pcb->ipsec_slot_size;
3564 }
3565 break;
3566 }
3567 case IPSEC_OPT_NETIF_RING_SIZE: {
3568 if (*len != sizeof(u_int32_t)) {
3569 result = EMSGSIZE;
3570 } else {
3571 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
3572 }
3573 break;
3574 }
3575 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3576 if (*len != sizeof(u_int32_t)) {
3577 result = EMSGSIZE;
3578 } else {
3579 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3580 }
3581 break;
3582 }
3583 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3584 if (*len != sizeof(u_int32_t)) {
3585 result = EMSGSIZE;
3586 } else {
3587 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
3588 }
3589 break;
3590 }
3591 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3592 if (*len != sizeof(u_int32_t)) {
3593 result = EMSGSIZE;
3594 } else {
3595 *(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3596 }
3597 break;
3598 }
3599 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3600 if (*len != sizeof(u_int32_t)) {
3601 result = EMSGSIZE;
3602 } else {
3603 *(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3604 }
3605 break;
3606 }
3607
3608#endif // IPSEC_NEXUS
3609
3610 default: {
3611 result = ENOPROTOOPT;
3612 break;
3613 }
3614 }
3615
3616 return result;
3617}
3618
3619/* Network Interface functions */
3620static errno_t
3621ipsec_output(ifnet_t interface,
3622 mbuf_t data)
3623{
3624 struct ipsec_pcb *pcb = ifnet_softc(interface);
3625 struct ipsec_output_state ipsec_state;
3626 struct route ro;
3627 struct route_in6 ro6;
3628 int length;
3629 struct ip *ip = NULL;
3630 struct ip6_hdr *ip6 = NULL;
3631 struct ip_out_args ipoa;
3632 struct ip6_out_args ip6oa;
3633 int error = 0;
3634 u_int ip_version = 0;
3635 int flags = 0;
3636 struct flowadv *adv = NULL;
3637
3638 // Make sure this packet isn't looping through the interface
3639 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
3640 error = EINVAL;
3641 goto ipsec_output_err;
3642 }
3643
3644 // Mark the interface so NECP can evaluate tunnel policy
3645 necp_mark_packet_from_interface(data, interface);
3646
3647 if (data->m_len < sizeof(*ip)) {
3648 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
3649 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3650 error = EINVAL;
3651 goto ipsec_output_err;
3652 }
3653
3654 ip = mtod(data, struct ip *);
3655 ip_version = ip->ip_v;
3656
3657 switch (ip_version) {
3658 case 4: {
3659 u_int8_t ip_hlen = 0;
3660#ifdef _IP_VHL
3661 ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
3662#else
3663 ip_hlen = ip->ip_hl << 2;
3664#endif
3665 if (ip_hlen < sizeof(*ip)) {
3666 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
3667 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
3668 error = EINVAL;
3669 goto ipsec_output_err;
3670 }
3671#if IPSEC_NEXUS
3672 if (!pcb->ipsec_use_netif)
3673#endif // IPSEC_NEXUS
3674 {
3675 int af = AF_INET;
3676 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3677 }
3678
3679 /* Apply encryption */
3680 memset(&ipsec_state, 0, sizeof(ipsec_state));
3681 ipsec_state.m = data;
3682 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3683 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3684
3685 error = ipsec4_interface_output(&ipsec_state, interface);
3686 /* Tunneled in IPv6 - packet is gone */
3687 if (error == 0 && ipsec_state.tunneled == 6) {
3688 goto done;
3689 }
3690
3691 data = ipsec_state.m;
3692 if (error || data == NULL) {
3693 if (error) {
3694 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
3695 }
3696 goto ipsec_output_err;
3697 }
3698
3699 /* Set traffic class, set flow */
3700 m_set_service_class(data, pcb->ipsec_output_service_class);
3701 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3702 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3703 data->m_pkthdr.pkt_proto = ip->ip_p;
3704 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3705
3706 /* Flip endian-ness for ip_output */
3707 ip = mtod(data, struct ip *);
3708 NTOHS(ip->ip_len);
3709 NTOHS(ip->ip_off);
3710
3711 /* Increment statistics */
3712 length = mbuf_pkthdr_len(data);
3713 ifnet_stat_increment_out(interface, 1, length, 0);
3714
3715 /* Send to ip_output */
3716 memset(&ro, 0, sizeof(ro));
3717
3718 flags = (IP_OUTARGS | /* Passing out args to specify interface */
3719 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
3720
3721 memset(&ipoa, 0, sizeof(ipoa));
3722 ipoa.ipoa_flowadv.code = 0;
3723 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3724 if (ipsec_state.outgoing_if) {
3725 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3726 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
3727 }
3728 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3729
3730 adv = &ipoa.ipoa_flowadv;
3731
3732 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3733 data = NULL;
3734
3735 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3736 error = ENOBUFS;
3737 ifnet_disable_output(interface);
3738 }
3739
3740 goto done;
3741 }
3742 case 6: {
3743 if (data->m_len < sizeof(*ip6)) {
3744 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
3745 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
3746 error = EINVAL;
3747 goto ipsec_output_err;
3748 }
3749#if IPSEC_NEXUS
3750 if (!pcb->ipsec_use_netif)
3751#endif // IPSEC_NEXUS
3752 {
3753 int af = AF_INET6;
3754 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3755 }
3756
3757 data = ipsec6_splithdr(data);
3758 if (data == NULL) {
3759 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
3760 goto ipsec_output_err;
3761 }
3762
3763 ip6 = mtod(data, struct ip6_hdr *);
3764
3765 memset(&ipsec_state, 0, sizeof(ipsec_state));
3766 ipsec_state.m = data;
3767 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3768 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3769
3770 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3771 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
3772 goto done;
3773 }
3774 data = ipsec_state.m;
3775 if (error || data == NULL) {
3776 if (error) {
3777 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
3778 }
3779 goto ipsec_output_err;
3780 }
3781
3782 /* Set traffic class, set flow */
3783 m_set_service_class(data, pcb->ipsec_output_service_class);
3784 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3785 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3786 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3787 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3788
3789 /* Increment statistics */
3790 length = mbuf_pkthdr_len(data);
3791 ifnet_stat_increment_out(interface, 1, length, 0);
3792
3793 /* Send to ip6_output */
3794 memset(&ro6, 0, sizeof(ro6));
3795
3796 flags = IPV6_OUTARGS;
3797
3798 memset(&ip6oa, 0, sizeof(ip6oa));
3799 ip6oa.ip6oa_flowadv.code = 0;
3800 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3801 if (ipsec_state.outgoing_if) {
3802 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3803 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
3804 }
3805 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3806
3807 adv = &ip6oa.ip6oa_flowadv;
3808
3809 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3810 data = NULL;
3811
3812 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3813 error = ENOBUFS;
3814 ifnet_disable_output(interface);
3815 }
3816
3817 goto done;
3818 }
3819 default: {
3820 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
3821 error = EINVAL;
3822 goto ipsec_output_err;
3823 }
3824 }
3825
3826done:
3827 return error;
3828
3829ipsec_output_err:
3830 if (data) {
3831 mbuf_freem(data);
3832 }
3833 goto done;
3834}
3835
3836static void
3837ipsec_start(ifnet_t interface)
3838{
3839 mbuf_t data;
3840 struct ipsec_pcb *pcb = ifnet_softc(interface);
3841
3842 VERIFY(pcb != NULL);
3843 for (;;) {
3844 if (ifnet_dequeue(interface, &data) != 0) {
3845 break;
3846 }
3847 if (ipsec_output(interface, data) != 0) {
3848 break;
3849 }
3850 }
3851}
3852
3853/* Network Interface functions */
3854static errno_t
3855ipsec_demux(__unused ifnet_t interface,
3856 mbuf_t data,
3857 __unused char *frame_header,
3858 protocol_family_t *protocol)
3859{
3860 struct ip *ip;
3861 u_int ip_version;
3862
3863 while (data != NULL && mbuf_len(data) < 1) {
3864 data = mbuf_next(data);
3865 }
3866
3867 if (data == NULL) {
3868 return ENOENT;
3869 }
3870
3871 ip = mtod(data, struct ip *);
3872 ip_version = ip->ip_v;
3873
3874 switch (ip_version) {
3875 case 4:
3876 *protocol = PF_INET;
3877 return 0;
3878 case 6:
3879 *protocol = PF_INET6;
3880 return 0;
3881 default:
3882 break;
3883 }
3884
3885 return 0;
3886}
3887
3888static errno_t
3889ipsec_add_proto(__unused ifnet_t interface,
3890 protocol_family_t protocol,
3891 __unused const struct ifnet_demux_desc *demux_array,
3892 __unused u_int32_t demux_count)
3893{
3894 switch (protocol) {
3895 case PF_INET:
3896 return 0;
3897 case PF_INET6:
3898 return 0;
3899 default:
3900 break;
3901 }
3902
3903 return ENOPROTOOPT;
3904}
3905
3906static errno_t
3907ipsec_del_proto(__unused ifnet_t interface,
3908 __unused protocol_family_t protocol)
3909{
3910 return 0;
3911}
3912
3913static errno_t
3914ipsec_ioctl(ifnet_t interface,
3915 u_long command,
3916 void *data)
3917{
3918#if IPSEC_NEXUS
3919 struct ipsec_pcb *pcb = ifnet_softc(interface);
3920#endif
3921 errno_t result = 0;
3922
3923 switch (command) {
3924 case SIOCSIFMTU: {
3925#if IPSEC_NEXUS
3926 if (pcb->ipsec_use_netif) {
3927 // Make sure we can fit packets in the channel buffers
3928 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
3929 result = EINVAL;
3930 } else {
3931 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
3932 }
3933 } else
3934#endif // IPSEC_NEXUS
3935 {
3936 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
3937 }
3938 break;
3939 }
3940
3941 case SIOCSIFFLAGS:
3942 /* ifioctl() takes care of it */
3943 break;
3944
3945 case SIOCSIFSUBFAMILY: {
3946 uint32_t subfamily;
3947
3948 subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
3949 switch (subfamily) {
3950 case IFRTYPE_SUBFAMILY_BLUETOOTH:
3951 interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
3952 break;
3953 case IFRTYPE_SUBFAMILY_WIFI:
3954 interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
3955 break;
3956 case IFRTYPE_SUBFAMILY_QUICKRELAY:
3957 interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
3958 break;
3959 case IFRTYPE_SUBFAMILY_DEFAULT:
3960 interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
3961 break;
3962 default:
3963 result = EINVAL;
3964 break;
3965 }
3966 break;
3967 }
3968
3969 default:
3970 result = EOPNOTSUPP;
3971 }
3972
3973 return result;
3974}
3975
3976static void
3977ipsec_detached(ifnet_t interface)
3978{
3979 struct ipsec_pcb *pcb = ifnet_softc(interface);
3980
3981 (void)ifnet_release(interface);
3982 lck_mtx_lock(&ipsec_lock);
3983 ipsec_free_pcb(pcb, true);
3984 (void)ifnet_dispose(interface);
3985 lck_mtx_unlock(&ipsec_lock);
3986}
3987
3988/* Protocol Handlers */
3989
3990static errno_t
3991ipsec_proto_input(ifnet_t interface,
3992 protocol_family_t protocol,
3993 mbuf_t m,
3994 __unused char *frame_header)
3995{
3996 mbuf_pkthdr_setrcvif(m, interface);
3997
3998#if IPSEC_NEXUS
3999 struct ipsec_pcb *pcb = ifnet_softc(interface);
4000 if (!pcb->ipsec_use_netif)
4001#endif // IPSEC_NEXUS
4002 {
4003 uint32_t af = 0;
4004 struct ip *ip = mtod(m, struct ip *);
4005 if (ip->ip_v == 4) {
4006 af = AF_INET;
4007 } else if (ip->ip_v == 6) {
4008 af = AF_INET6;
4009 }
4010 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
4011 pktap_input(interface, protocol, m, NULL);
4012 }
4013
4014 int32_t pktlen = m->m_pkthdr.len;
4015 if (proto_input(protocol, m) != 0) {
4016 ifnet_stat_increment_in(interface, 0, 0, 1);
4017 m_freem(m);
4018 } else {
4019 ifnet_stat_increment_in(interface, 1, pktlen, 0);
4020 }
4021
4022 return 0;
4023}
4024
4025static errno_t
4026ipsec_proto_pre_output(__unused ifnet_t interface,
4027 protocol_family_t protocol,
4028 __unused mbuf_t *packet,
4029 __unused const struct sockaddr *dest,
4030 __unused void *route,
4031 __unused char *frame_type,
4032 __unused char *link_layer_dest)
4033{
4034 *(protocol_family_t *)(void *)frame_type = protocol;
4035 return 0;
4036}
4037
4038static errno_t
4039ipsec_attach_proto(ifnet_t interface,
4040 protocol_family_t protocol)
4041{
4042 struct ifnet_attach_proto_param proto;
4043 errno_t result;
4044
4045 bzero(&proto, sizeof(proto));
4046 proto.input = ipsec_proto_input;
4047 proto.pre_output = ipsec_proto_pre_output;
4048
4049 result = ifnet_attach_protocol(interface, protocol, &proto);
4050 if (result != 0 && result != EEXIST) {
4051 os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4052 protocol, result);
4053 }
4054
4055 return result;
4056}
4057
4058errno_t
4059ipsec_inject_inbound_packet(ifnet_t interface,
4060 mbuf_t packet)
4061{
4062#if IPSEC_NEXUS
4063 struct ipsec_pcb *pcb = ifnet_softc(interface);
4064
4065 if (pcb->ipsec_use_netif) {
4066 if (!ipsec_data_move_begin(pcb)) {
4067 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4068 if_name(pcb->ipsec_ifp));
4069 return ENXIO;
4070 }
4071
4072 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
4073
4074 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
4075
4076 if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4077 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4078 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4079 ipsec_data_move_end(pcb);
4080 return ENOSPC;
4081 }
4082
4083 if (pcb->ipsec_input_chain != NULL) {
4084 pcb->ipsec_input_chain_last->m_nextpkt = packet;
4085 } else {
4086 pcb->ipsec_input_chain = packet;
4087 }
4088 pcb->ipsec_input_chain_count++;
4089 while (packet->m_nextpkt) {
4090 VERIFY(packet != packet->m_nextpkt);
4091 packet = packet->m_nextpkt;
4092 pcb->ipsec_input_chain_count++;
4093 }
4094 pcb->ipsec_input_chain_last = packet;
4095 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
4096
4097 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
4098 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
4099
4100 if (rx_ring != NULL) {
4101 kern_channel_notify(rx_ring, 0);
4102 }
4103
4104 ipsec_data_move_end(pcb);
4105 return 0;
4106 } else
4107#endif // IPSEC_NEXUS
4108 {
4109 errno_t error;
4110 protocol_family_t protocol;
4111 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4112 return error;
4113 }
4114
4115 return ipsec_proto_input(interface, protocol, packet, NULL);
4116 }
4117}
4118
4119void
4120ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family)
4121{
4122 if (packet != NULL && interface != NULL) {
4123 struct ipsec_pcb *pcb = ifnet_softc(interface);
4124 if (pcb != NULL) {
4125 /* Set traffic class, set flow */
4126 m_set_service_class(packet, pcb->ipsec_output_service_class);
4127 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4128 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4129 if (family == AF_INET) {
4130 struct ip *ip = mtod(packet, struct ip *);
4131 packet->m_pkthdr.pkt_proto = ip->ip_p;
4132 } else if (family == AF_INET6) {
4133 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4134 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4135 }
4136 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4137 }
4138 }
4139}
4140
4141void
4142ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4143{
4144 struct ipsec_pcb *pcb;
4145
4146 if (interface == NULL || ipoa == NULL) {
4147 return;
4148 }
4149 pcb = ifnet_softc(interface);
4150
4151 if (net_qos_policy_restricted == 0) {
4152 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4153 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4154 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4155 net_qos_policy_restrict_avapps != 0) {
4156 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4157 } else {
4158 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4159 ipoa->ipoa_sotc = SO_TC_VO;
4160 }
4161}
4162
4163void
4164ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4165{
4166 struct ipsec_pcb *pcb;
4167
4168 if (interface == NULL || ip6oa == NULL) {
4169 return;
4170 }
4171 pcb = ifnet_softc(interface);
4172
4173 if (net_qos_policy_restricted == 0) {
4174 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4175 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4176 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
4177 net_qos_policy_restrict_avapps != 0) {
4178 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4179 } else {
4180 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4181 ip6oa->ip6oa_sotc = SO_TC_VO;
4182 }
4183}
4184
4185
4186static void
4187ipsec_data_move_drain(struct ipsec_pcb *pcb)
4188{
4189 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4190 /* data path must already be marked as not ready */
4191 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4192 pcb->ipsec_pcb_drainers++;
4193 while (pcb->ipsec_pcb_data_move != 0) {
4194 (void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4195 (PZERO - 1), __func__, NULL);
4196 }
4197 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4198 VERIFY(pcb->ipsec_pcb_drainers > 0);
4199 pcb->ipsec_pcb_drainers--;
4200 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4201}
4202
4203static void
4204ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4205{
4206 /*
4207 * Mark the data path as not usable.
4208 */
4209 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4210 IPSEC_CLR_DATA_PATH_READY(pcb);
4211 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4212
4213 /* Wait until all threads in the data paths are done. */
4214 ipsec_data_move_drain(pcb);
4215}