]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/if_ipsec.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / bsd / net / if_ipsec.c
CommitLineData
39236c6e 1/*
cb323159 2 * Copyright (c) 2012-2019 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
d9a64523 29
39236c6e
A
30#include <sys/systm.h>
31#include <sys/kern_control.h>
32#include <net/kpi_protocol.h>
33#include <net/kpi_interface.h>
34#include <sys/socket.h>
fe8ab488 35#include <sys/socketvar.h>
39236c6e
A
36#include <net/if.h>
37#include <net/if_types.h>
38#include <net/bpf.h>
39#include <net/if_ipsec.h>
39236c6e
A
40#include <sys/mbuf.h>
41#include <sys/sockio.h>
42#include <netinet/in.h>
43#include <netinet/ip6.h>
44#include <netinet6/in6_var.h>
45#include <netinet6/ip6_var.h>
46#include <sys/kauth.h>
47#include <netinet6/ipsec.h>
48#include <netinet6/ipsec6.h>
5ba3f43e
A
49#include <netinet6/esp.h>
50#include <netinet6/esp6.h>
39236c6e
A
51#include <netinet/ip.h>
52#include <net/flowadv.h>
fe8ab488 53#include <net/necp.h>
39037602
A
54#include <netkey/key.h>
55#include <net/pktap.h>
5ba3f43e 56#include <kern/zalloc.h>
cb323159 57#include <os/log.h>
5ba3f43e
A
58
59#define IPSEC_NEXUS 0
39037602
A
60
61extern int net_qos_policy_restricted;
62extern int net_qos_policy_restrict_avapps;
39236c6e
A
63
64/* Kernel Control functions */
0a7de745
A
65static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
66 void **unitinfo);
67static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
68 void **unitinfo);
69static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
70 void *unitinfo);
71static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
72 void *unitinfo, mbuf_t m, int flags);
73static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
74 int opt, void *data, size_t *len);
75static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
76 int opt, void *data, size_t len);
39236c6e
A
77
78/* Network Interface functions */
0a7de745
A
79static void ipsec_start(ifnet_t interface);
80static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
81static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
82 protocol_family_t *protocol);
83static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
84 const struct ifnet_demux_desc *demux_array,
85 u_int32_t demux_count);
86static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
87static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
88static void ipsec_detached(ifnet_t interface);
39236c6e
A
89
90/* Protocol handlers */
0a7de745
A
91static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
92static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
93 mbuf_t m, char *frame_header);
39236c6e 94static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
0a7de745
A
95 mbuf_t *packet, const struct sockaddr *dest, void *route,
96 char *frame_type, char *link_layer_dest);
39236c6e 97
0a7de745 98static kern_ctl_ref ipsec_kctlref;
5ba3f43e
A
99static lck_attr_t *ipsec_lck_attr;
100static lck_grp_attr_t *ipsec_lck_grp_attr;
101static lck_grp_t *ipsec_lck_grp;
102static lck_mtx_t ipsec_lock;
103
104#if IPSEC_NEXUS
105
106SYSCTL_DECL(_net_ipsec);
107SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
108static int if_ipsec_verify_interface_creation = 0;
109SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
110
0a7de745 111#define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
5ba3f43e 112
5c9f4661 113#define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
5ba3f43e
A
114#define IPSEC_IF_DEFAULT_RING_SIZE 64
115#define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
116#define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
0a7de745 117#define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
5ba3f43e 118
cb323159
A
119#define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
120#define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
121#define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
122#define IPSEC_NETIF_WMM_RX_RING_COUNT 1
123#define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
124#define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
125
126#define IPSEC_IF_MIN_RING_SIZE 8
5ba3f43e
A
127#define IPSEC_IF_MAX_RING_SIZE 1024
128
5c9f4661
A
129#define IPSEC_IF_MIN_SLOT_SIZE 1024
130#define IPSEC_IF_MAX_SLOT_SIZE 4096
131
cb323159
A
132#define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
133
134static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
135
5ba3f43e
A
136static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
137static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
138static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
139
140static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
141static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
142static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
143
cb323159 144SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
5ba3f43e 145SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
0a7de745 146 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
5ba3f43e 147SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
0a7de745 148 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
5ba3f43e 149SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
0a7de745 150 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
5ba3f43e 151
cb323159
A
152static int if_ipsec_debug = 0;
153SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
154
5ba3f43e
A
155static errno_t
156ipsec_register_nexus(void);
157
158typedef struct ipsec_nx {
159 uuid_t if_provider;
160 uuid_t if_instance;
cb323159
A
161 uuid_t fsw_provider;
162 uuid_t fsw_instance;
163 uuid_t fsw_device;
164 uuid_t fsw_host;
165 uuid_t fsw_agent;
5ba3f43e
A
166} *ipsec_nx_t;
167
168static nexus_controller_t ipsec_ncd;
169static int ipsec_ncd_refcount;
170static uuid_t ipsec_kpipe_uuid;
171
172#endif // IPSEC_NEXUS
173
174/* Control block allocated for each kernel control connection */
175struct ipsec_pcb {
0a7de745
A
176 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
177 kern_ctl_ref ipsec_ctlref;
cb323159
A
178 ifnet_t ipsec_ifp;
179 u_int32_t ipsec_unit;
180 u_int32_t ipsec_unique_id;
181 // These external flags can be set with IPSEC_OPT_FLAGS
182 u_int32_t ipsec_external_flags;
183 // These internal flags are only used within this driver
184 u_int32_t ipsec_internal_flags;
185 u_int32_t ipsec_input_frag_size;
186 bool ipsec_frag_size_set;
187 int ipsec_ext_ifdata_stats;
0a7de745 188 mbuf_svc_class_t ipsec_output_service_class;
cb323159
A
189 char ipsec_if_xname[IFXNAMSIZ];
190 char ipsec_unique_name[IFXNAMSIZ];
191 // PCB lock protects state fields, like ipsec_kpipe_count
5ba3f43e 192 decl_lck_rw_data(, ipsec_pcb_lock);
cb323159
A
193 // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
194 decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
195 u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */
196 u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */
197 u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */
5ba3f43e
A
198
199#if IPSEC_NEXUS
cb323159
A
200 lck_mtx_t ipsec_input_chain_lock;
201 lck_mtx_t ipsec_kpipe_encrypt_lock;
202 lck_mtx_t ipsec_kpipe_decrypt_lock;
0a7de745
A
203 struct mbuf * ipsec_input_chain;
204 struct mbuf * ipsec_input_chain_last;
cb323159 205 u_int32_t ipsec_input_chain_count;
5ba3f43e
A
206 // Input chain lock protects the list of input mbufs
207 // The input chain lock must be taken AFTER the PCB lock if both are held
0a7de745 208 struct ipsec_nx ipsec_nx;
cb323159
A
209 u_int32_t ipsec_kpipe_count;
210 pid_t ipsec_kpipe_pid;
211 uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
212 void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
213 void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
214 kern_pbufpool_t ipsec_kpipe_pp;
215 u_int32_t ipsec_kpipe_tx_ring_size;
216 u_int32_t ipsec_kpipe_rx_ring_size;
0a7de745
A
217
218 kern_nexus_t ipsec_netif_nexus;
cb323159
A
219 kern_pbufpool_t ipsec_netif_pp;
220 void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
221 void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
222 uint64_t ipsec_netif_txring_size;
223
224 u_int32_t ipsec_slot_size;
225 u_int32_t ipsec_netif_ring_size;
226 u_int32_t ipsec_tx_fsw_ring_size;
227 u_int32_t ipsec_rx_fsw_ring_size;
228 bool ipsec_use_netif;
229 bool ipsec_needs_netagent;
5ba3f43e
A
230#endif // IPSEC_NEXUS
231};
232
cb323159
A
233/* These are internal flags not exposed outside this file */
234#define IPSEC_FLAGS_KPIPE_ALLOCATED 1
235
236/* data movement refcounting functions */
237static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
238static void ipsec_data_move_end(struct ipsec_pcb *pcb);
239static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
240
241/* Data path states */
242#define IPSEC_PCB_DATA_PATH_READY 0x1
243
244/* Macros to set/clear/test data path states */
245#define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
246#define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
247#define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
248
249#if IPSEC_NEXUS
250/* Macros to clear/set/test flags. */
251static inline void
252ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
253{
254 pcb->ipsec_internal_flags |= flag;
255}
256static inline void
257ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
258{
259 pcb->ipsec_internal_flags &= ~flag;
260}
261
262static inline bool
263ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
264{
265 return !!(pcb->ipsec_internal_flags & flag);
266}
267#endif // IPSEC_NEXUS
268
5ba3f43e
A
269TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
270
0a7de745
A
271#define IPSEC_PCB_ZONE_MAX 32
272#define IPSEC_PCB_ZONE_NAME "net.if_ipsec"
5ba3f43e 273
0a7de745
A
274static unsigned int ipsec_pcb_size; /* size of zone element */
275static struct zone *ipsec_pcb_zone; /* zone for ipsec_pcb */
5ba3f43e
A
276
277#define IPSECQ_MAXLEN 256
278
279#if IPSEC_NEXUS
280static int
281sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
282{
283#pragma unused(arg1, arg2)
284 int value = if_ipsec_ring_size;
285
286 int error = sysctl_handle_int(oidp, &value, 0, req);
287 if (error || !req->newptr) {
0a7de745 288 return error;
5ba3f43e
A
289 }
290
291 if (value < IPSEC_IF_MIN_RING_SIZE ||
0a7de745
A
292 value > IPSEC_IF_MAX_RING_SIZE) {
293 return EINVAL;
5ba3f43e
A
294 }
295
296 if_ipsec_ring_size = value;
297
0a7de745 298 return 0;
5ba3f43e
A
299}
300
301static int
302sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
303{
304#pragma unused(arg1, arg2)
305 int value = if_ipsec_tx_fsw_ring_size;
306
307 int error = sysctl_handle_int(oidp, &value, 0, req);
308 if (error || !req->newptr) {
0a7de745 309 return error;
5ba3f43e
A
310 }
311
312 if (value < IPSEC_IF_MIN_RING_SIZE ||
0a7de745
A
313 value > IPSEC_IF_MAX_RING_SIZE) {
314 return EINVAL;
5ba3f43e
A
315 }
316
317 if_ipsec_tx_fsw_ring_size = value;
318
0a7de745 319 return 0;
5ba3f43e
A
320}
321
322static int
323sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
324{
325#pragma unused(arg1, arg2)
326 int value = if_ipsec_rx_fsw_ring_size;
327
328 int error = sysctl_handle_int(oidp, &value, 0, req);
329 if (error || !req->newptr) {
0a7de745 330 return error;
5ba3f43e
A
331 }
332
333 if (value < IPSEC_IF_MIN_RING_SIZE ||
0a7de745
A
334 value > IPSEC_IF_MAX_RING_SIZE) {
335 return EINVAL;
5ba3f43e
A
336 }
337
338 if_ipsec_rx_fsw_ring_size = value;
339
0a7de745 340 return 0;
5ba3f43e 341}
cb323159
A
342
343
344static inline bool
345ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
346{
347 return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
348}
349
5ba3f43e
A
350#endif // IPSEC_NEXUS
351
352errno_t
353ipsec_register_control(void)
354{
0a7de745
A
355 struct kern_ctl_reg kern_ctl;
356 errno_t result = 0;
357
5ba3f43e
A
358 ipsec_pcb_size = sizeof(struct ipsec_pcb);
359 ipsec_pcb_zone = zinit(ipsec_pcb_size,
0a7de745
A
360 IPSEC_PCB_ZONE_MAX * ipsec_pcb_size,
361 0, IPSEC_PCB_ZONE_NAME);
5ba3f43e 362 if (ipsec_pcb_zone == NULL) {
cb323159 363 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - zinit(ipsec_pcb) failed");
5ba3f43e
A
364 return ENOMEM;
365 }
366
367#if IPSEC_NEXUS
368 ipsec_register_nexus();
369#endif // IPSEC_NEXUS
370
371 TAILQ_INIT(&ipsec_head);
0a7de745 372
5ba3f43e
A
373 bzero(&kern_ctl, sizeof(kern_ctl));
374 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
375 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
376 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */
377 kern_ctl.ctl_sendsize = 64 * 1024;
378 kern_ctl.ctl_recvsize = 64 * 1024;
5c9f4661 379 kern_ctl.ctl_bind = ipsec_ctl_bind;
5ba3f43e
A
380 kern_ctl.ctl_connect = ipsec_ctl_connect;
381 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
382 kern_ctl.ctl_send = ipsec_ctl_send;
383 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
384 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
0a7de745 385
5ba3f43e
A
386 result = ctl_register(&kern_ctl, &ipsec_kctlref);
387 if (result != 0) {
cb323159 388 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
5ba3f43e
A
389 return result;
390 }
0a7de745 391
5ba3f43e 392 /* Register the protocol plumbers */
cb323159 393 if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
0a7de745 394 ipsec_attach_proto, NULL)) != 0) {
cb323159
A
395 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
396 result);
5ba3f43e
A
397 ctl_deregister(ipsec_kctlref);
398 return result;
399 }
0a7de745 400
5ba3f43e 401 /* Register the protocol plumbers */
cb323159 402 if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
0a7de745 403 ipsec_attach_proto, NULL)) != 0) {
cb323159 404 proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
5ba3f43e 405 ctl_deregister(ipsec_kctlref);
cb323159
A
406 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
407 result);
5ba3f43e
A
408 return result;
409 }
410
411 ipsec_lck_attr = lck_attr_alloc_init();
412 ipsec_lck_grp_attr = lck_grp_attr_alloc_init();
413 ipsec_lck_grp = lck_grp_alloc_init("ipsec", ipsec_lck_grp_attr);
414 lck_mtx_init(&ipsec_lock, ipsec_lck_grp, ipsec_lck_attr);
0a7de745 415
5ba3f43e
A
416 return 0;
417}
418
419/* Helpers */
420int
0a7de745 421ipsec_interface_isvalid(ifnet_t interface)
5ba3f43e 422{
0a7de745
A
423 struct ipsec_pcb *pcb = NULL;
424
425 if (interface == NULL) {
426 return 0;
427 }
428
429 pcb = ifnet_softc(interface);
430
431 if (pcb == NULL) {
432 return 0;
433 }
434
435 /* When ctl disconnects, ipsec_unit is set to 0 */
436 if (pcb->ipsec_unit == 0) {
437 return 0;
438 }
439
440 return 1;
5ba3f43e
A
441}
442
d9a64523 443#if IPSEC_NEXUS
9d749ea3
A
444boolean_t
445ipsec_interface_needs_netagent(ifnet_t interface)
446{
447 struct ipsec_pcb *pcb = NULL;
448
449 if (interface == NULL) {
0a7de745 450 return FALSE;
9d749ea3
A
451 }
452
453 pcb = ifnet_softc(interface);
454
455 if (pcb == NULL) {
0a7de745 456 return FALSE;
9d749ea3
A
457 }
458
0a7de745 459 return pcb->ipsec_needs_netagent == true;
9d749ea3 460}
d9a64523 461#endif // IPSEC_NEXUS
9d749ea3 462
5ba3f43e
A
463static errno_t
464ipsec_ifnet_set_attrs(ifnet_t ifp)
465{
466 /* Set flags and additional information. */
467 ifnet_set_mtu(ifp, 1500);
468 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
469
470 /* The interface must generate its own IPv6 LinkLocal address,
471 * if possible following the recommendation of RFC2472 to the 64bit interface ID
472 */
473 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
474
475#if !IPSEC_NEXUS
476 /* Reset the stats in case as the interface may have been recycled */
477 struct ifnet_stats_param stats;
478 bzero(&stats, sizeof(struct ifnet_stats_param));
479 ifnet_set_stat(ifp, &stats);
480#endif // !IPSEC_NEXUS
481
0a7de745 482 return 0;
5ba3f43e
A
483}
484
485#if IPSEC_NEXUS
486
487static uuid_t ipsec_nx_dom_prov;
488
489static errno_t
490ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
491{
492 return 0;
493}
494
495static void
496ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
497{
498 // Ignore
499}
500
501static errno_t
502ipsec_register_nexus(void)
503{
504 const struct kern_nexus_domain_provider_init dp_init = {
505 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
506 .nxdpi_flags = 0,
507 .nxdpi_init = ipsec_nxdp_init,
508 .nxdpi_fini = ipsec_nxdp_fini
509 };
510 errno_t err = 0;
511
512 /* ipsec_nxdp_init() is called before this function returns */
513 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
0a7de745
A
514 (const uint8_t *) "com.apple.ipsec",
515 &dp_init, sizeof(dp_init),
516 &ipsec_nx_dom_prov);
5ba3f43e 517 if (err != 0) {
cb323159 518 os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
0a7de745 519 return err;
5ba3f43e 520 }
0a7de745 521 return 0;
5ba3f43e
A
522}
523
524static errno_t
525ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
526{
527 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
528 pcb->ipsec_netif_nexus = nexus;
0a7de745 529 return ipsec_ifnet_set_attrs(ifp);
5ba3f43e
A
530}
531
532static errno_t
533ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
0a7de745
A
534 proc_t p, kern_nexus_t nexus,
535 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
5ba3f43e
A
536{
537#pragma unused(nxprov, p)
538#pragma unused(nexus, nexus_port, channel, ch_ctx)
0a7de745 539 return 0;
5ba3f43e
A
540}
541
542static errno_t
543ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 544 kern_channel_t channel)
5ba3f43e
A
545{
546#pragma unused(nxprov, channel)
547 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
548 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
cb323159
A
549 /* Mark the data path as ready */
550 if (ok) {
551 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
552 IPSEC_SET_DATA_PATH_READY(pcb);
553 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
554 }
0a7de745 555 return ok ? 0 : ENXIO;
5ba3f43e
A
556}
557
558static void
559ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 560 kern_channel_t channel)
5ba3f43e 561{
cb323159
A
562#pragma unused(nxprov, channel)
563 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
564
565 VERIFY(pcb->ipsec_kpipe_count != 0);
566
567 /* Wait until all threads in the data paths are done. */
568 ipsec_wait_data_move_drain(pcb);
5ba3f43e
A
569}
570
571static void
572ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 573 kern_channel_t channel)
5ba3f43e 574{
cb323159
A
575#pragma unused(nxprov, channel)
576 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
577
578 /* Wait until all threads in the data paths are done. */
579 ipsec_wait_data_move_drain(pcb);
5ba3f43e
A
580}
581
582static void
583ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 584 kern_channel_t channel)
5ba3f43e
A
585{
586#pragma unused(nxprov, channel)
587 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
588 if (pcb->ipsec_netif_nexus == nexus) {
589 pcb->ipsec_netif_nexus = NULL;
590 }
591 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
592}
593
594static errno_t
595ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745
A
596 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
597 void **ring_ctx)
5ba3f43e
A
598{
599#pragma unused(nxprov)
600#pragma unused(channel)
5ba3f43e 601 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
cb323159
A
602 uint8_t ring_idx;
603
604 for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
605 if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
606 break;
607 }
608 }
609
610 if (ring_idx == pcb->ipsec_kpipe_count) {
611 uuid_string_t uuidstr;
612 uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
613 os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
614 return ENOENT;
615 }
616
617 *ring_ctx = (void *)(uintptr_t)ring_idx;
618
5ba3f43e 619 if (!is_tx_ring) {
cb323159
A
620 VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
621 pcb->ipsec_kpipe_rxring[ring_idx] = ring;
5ba3f43e 622 } else {
cb323159
A
623 VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
624 pcb->ipsec_kpipe_txring[ring_idx] = ring;
5ba3f43e
A
625 }
626 return 0;
627}
628
629static void
630ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 631 kern_channel_ring_t ring)
5ba3f43e
A
632{
633#pragma unused(nxprov)
cb323159 634 bool found = false;
5ba3f43e 635 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
cb323159
A
636
637 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
638 if (pcb->ipsec_kpipe_rxring[i] == ring) {
639 pcb->ipsec_kpipe_rxring[i] = NULL;
640 found = true;
641 } else if (pcb->ipsec_kpipe_txring[i] == ring) {
642 pcb->ipsec_kpipe_txring[i] = NULL;
643 found = true;
644 }
5ba3f43e 645 }
cb323159 646 VERIFY(found);
5ba3f43e
A
647}
648
649static errno_t
650ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 651 kern_channel_ring_t tx_ring, uint32_t flags)
5ba3f43e
A
652{
653#pragma unused(nxprov)
654#pragma unused(flags)
655 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
656
cb323159
A
657 if (!ipsec_data_move_begin(pcb)) {
658 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
659 return 0;
660 }
661
5ba3f43e 662 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
663
664 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
5ba3f43e 665 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 666 ipsec_data_move_end(pcb);
5ba3f43e
A
667 return 0;
668 }
669
cb323159
A
670 VERIFY(pcb->ipsec_kpipe_count);
671
5ba3f43e
A
672 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
673 if (tx_slot == NULL) {
674 // Nothing to write, bail
675 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 676 ipsec_data_move_end(pcb);
5ba3f43e
A
677 return 0;
678 }
679
680 // Signal the netif ring to read
cb323159 681 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
5ba3f43e
A
682 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
683
684 if (rx_ring != NULL) {
685 kern_channel_notify(rx_ring, 0);
686 }
cb323159
A
687
688 ipsec_data_move_end(pcb);
5ba3f43e
A
689 return 0;
690}
691
692static mbuf_t
693ipsec_encrypt_mbuf(ifnet_t interface,
0a7de745 694 mbuf_t data)
5ba3f43e
A
695{
696 struct ipsec_output_state ipsec_state;
697 int error = 0;
698 uint32_t af;
699
700 // Make sure this packet isn't looping through the interface
701 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
702 error = -1;
703 goto ipsec_output_err;
704 }
705
706 // Mark the interface so NECP can evaluate tunnel policy
707 necp_mark_packet_from_interface(data, interface);
708
709 struct ip *ip = mtod(data, struct ip *);
710 u_int ip_version = ip->ip_v;
711
712 switch (ip_version) {
0a7de745
A
713 case 4: {
714 af = AF_INET;
715
716 memset(&ipsec_state, 0, sizeof(ipsec_state));
717 ipsec_state.m = data;
718 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
719 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
720
721 error = ipsec4_interface_output(&ipsec_state, interface);
722 if (error == 0 && ipsec_state.tunneled == 6) {
723 // Tunneled in IPv6 - packet is gone
724 // TODO: Don't lose mbuf
725 data = NULL;
5ba3f43e
A
726 goto done;
727 }
5ba3f43e 728
0a7de745
A
729 data = ipsec_state.m;
730 if (error || data == NULL) {
731 if (error) {
cb323159 732 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
5ba3f43e 733 }
0a7de745
A
734 goto ipsec_output_err;
735 }
736 goto done;
737 }
738 case 6: {
739 af = AF_INET6;
5ba3f43e 740
0a7de745
A
741 data = ipsec6_splithdr(data);
742 if (data == NULL) {
cb323159 743 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
0a7de745
A
744 goto ipsec_output_err;
745 }
5ba3f43e 746
0a7de745 747 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
5ba3f43e 748
0a7de745
A
749 memset(&ipsec_state, 0, sizeof(ipsec_state));
750 ipsec_state.m = data;
751 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
752 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
753
754 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
755 if (error == 0 && ipsec_state.tunneled == 4) {
756 // Tunneled in IPv4 - packet is gone
757 // TODO: Don't lose mbuf
758 data = NULL;
5ba3f43e
A
759 goto done;
760 }
0a7de745
A
761 data = ipsec_state.m;
762 if (error || data == NULL) {
763 if (error) {
cb323159 764 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
0a7de745 765 }
5ba3f43e
A
766 goto ipsec_output_err;
767 }
0a7de745
A
768 goto done;
769 }
770 default: {
cb323159 771 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
0a7de745
A
772 error = -1;
773 goto ipsec_output_err;
774 }
5ba3f43e
A
775 }
776
777done:
778 return data;
779
780ipsec_output_err:
781 if (data) {
782 mbuf_freem(data);
783 }
784 return NULL;
785}
786
787static errno_t
788ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 789 kern_channel_ring_t rx_ring, uint32_t flags)
5ba3f43e
A
790{
791#pragma unused(nxprov)
792#pragma unused(flags)
793 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
794 struct kern_channel_ring_stat_increment rx_ring_stats;
cb323159
A
795 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
796
797 if (!ipsec_data_move_begin(pcb)) {
798 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
799 return 0;
800 }
5ba3f43e
A
801
802 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
803
cb323159 804 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
5ba3f43e 805 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 806 ipsec_data_move_end(pcb);
5ba3f43e
A
807 return 0;
808 }
809
cb323159
A
810 VERIFY(pcb->ipsec_kpipe_count);
811 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
812
5ba3f43e
A
813 // Reclaim user-released slots
814 (void) kern_channel_reclaim(rx_ring);
815
816 uint32_t avail = kern_channel_available_slot_count(rx_ring);
817 if (avail == 0) {
818 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
819 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
820 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
821 ipsec_data_move_end(pcb);
5ba3f43e
A
822 return 0;
823 }
824
cb323159 825 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
5ba3f43e
A
826 if (tx_ring == NULL) {
827 // Net-If TX ring not set up yet, nothing to read
828 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
829 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
830 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
831 ipsec_data_move_end(pcb);
5ba3f43e
A
832 return 0;
833 }
834
835 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
836
837 // Unlock ipsec before entering ring
838 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
839
840 (void)kr_enter(tx_ring, TRUE);
841
842 // Lock again after entering and validate
843 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159 844 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
5ba3f43e
A
845 // Ring no longer valid
846 // Unlock first, then exit ring
847 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
848 kr_exit(tx_ring);
cb323159
A
849 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
850 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
851 ipsec_data_move_end(pcb);
5ba3f43e
A
852 return 0;
853 }
854
5ba3f43e
A
855 struct kern_channel_ring_stat_increment tx_ring_stats;
856 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
857 kern_channel_slot_t tx_pslot = NULL;
858 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
859 if (tx_slot == NULL) {
860 // Nothing to read, don't bother signalling
861 // Unlock first, then exit ring
862 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
863 kr_exit(tx_ring);
cb323159 864 ipsec_data_move_end(pcb);
5ba3f43e
A
865 return 0;
866 }
867
868 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
869 VERIFY(rx_pp != NULL);
870 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
871 kern_channel_slot_t rx_pslot = NULL;
872 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
873
874 while (rx_slot != NULL && tx_slot != NULL) {
875 size_t length = 0;
876 mbuf_t data = NULL;
877 errno_t error = 0;
878
879 // Allocate rx packet
880 kern_packet_t rx_ph = 0;
881 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
a39ff7e2 882 if (__improbable(error != 0)) {
cb323159 883 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
0a7de745 884 pcb->ipsec_ifp->if_xname);
5ba3f43e
A
885 break;
886 }
887
888 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
889
890 // Advance TX ring
891 tx_pslot = tx_slot;
892 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
893
894 if (tx_ph == 0) {
a39ff7e2 895 kern_pbufpool_free(rx_pp, rx_ph);
5ba3f43e
A
896 continue;
897 }
0a7de745 898
5ba3f43e
A
899 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
900 VERIFY(tx_buf != NULL);
901 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
902 VERIFY(tx_baddr != NULL);
903 tx_baddr += kern_buflet_get_data_offset(tx_buf);
904
905 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
906
907 length = MIN(kern_packet_get_data_length(tx_ph),
0a7de745 908 pcb->ipsec_slot_size);
5ba3f43e
A
909
910 // Increment TX stats
911 tx_ring_stats.kcrsi_slots_transferred++;
912 tx_ring_stats.kcrsi_bytes_transferred += length;
913
914 if (length > 0) {
915 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
916 if (error == 0) {
917 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
918 if (error == 0) {
919 // Encrypt and send packet
cb323159 920 lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
5ba3f43e 921 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
cb323159 922 lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
5ba3f43e 923 } else {
cb323159
A
924 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
925 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
926 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
927 mbuf_freem(data);
928 data = NULL;
929 }
930 } else {
cb323159
A
931 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
932 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
933 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
934 }
935 } else {
cb323159
A
936 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
937 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
938 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
939 }
940
941 if (data == NULL) {
cb323159 942 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
5ba3f43e
A
943 kern_pbufpool_free(rx_pp, rx_ph);
944 break;
945 }
946
947 length = mbuf_pkthdr_len(data);
948 if (length > rx_pp->pp_buflet_size) {
949 // Flush data
950 mbuf_freem(data);
951 kern_pbufpool_free(rx_pp, rx_ph);
cb323159 952 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
0a7de745 953 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
5ba3f43e
A
954 continue;
955 }
956
957 // Fillout rx packet
958 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
959 VERIFY(rx_buf != NULL);
960 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
961 VERIFY(rx_baddr != NULL);
962
963 // Copy-in data from mbuf to buflet
964 mbuf_copydata(data, 0, length, (void *)rx_baddr);
0a7de745 965 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
5ba3f43e
A
966
967 // Finalize and attach the packet
968 error = kern_buflet_set_data_offset(rx_buf, 0);
969 VERIFY(error == 0);
970 error = kern_buflet_set_data_length(rx_buf, length);
971 VERIFY(error == 0);
972 error = kern_packet_finalize(rx_ph);
973 VERIFY(error == 0);
974 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
975 VERIFY(error == 0);
976
cb323159
A
977 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
978 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
5ba3f43e
A
979
980 rx_ring_stats.kcrsi_slots_transferred++;
981 rx_ring_stats.kcrsi_bytes_transferred += length;
982
983 if (!pcb->ipsec_ext_ifdata_stats) {
984 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, length, 0);
985 }
986
987 mbuf_freem(data);
988
989 rx_pslot = rx_slot;
990 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
991 }
992
993 if (rx_pslot) {
994 kern_channel_advance_slot(rx_ring, rx_pslot);
995 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
996 }
997
998 if (tx_pslot) {
999 kern_channel_advance_slot(tx_ring, tx_pslot);
1000 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1001 (void)kern_channel_reclaim(tx_ring);
1002 }
1003
5c9f4661
A
1004 /* always reenable output */
1005 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1006 if (error != 0) {
cb323159 1007 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
5ba3f43e
A
1008 }
1009
1010 // Unlock first, then exit ring
1011 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1012
1013 if (tx_pslot != NULL) {
1014 kern_channel_notify(tx_ring, 0);
1015 }
1016 kr_exit(tx_ring);
1017
cb323159 1018 ipsec_data_move_end(pcb);
5ba3f43e
A
1019 return 0;
1020}
1021
cb323159
A
1022static uint8_t
1023ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1024{
1025 switch (svc_class) {
1026 case KPKT_SC_VO: {
1027 return 0;
1028 }
1029 case KPKT_SC_VI: {
1030 return 1;
1031 }
1032 case KPKT_SC_BE: {
1033 return 2;
1034 }
1035 case KPKT_SC_BK: {
1036 return 3;
1037 }
1038 default: {
1039 VERIFY(0);
1040 return 0;
1041 }
1042 }
1043}
1044
5ba3f43e
A
1045static errno_t
1046ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745
A
1047 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1048 void **ring_ctx)
5ba3f43e
A
1049{
1050#pragma unused(nxprov)
1051#pragma unused(channel)
5ba3f43e 1052 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
cb323159 1053
5ba3f43e 1054 if (!is_tx_ring) {
cb323159
A
1055 VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1056 pcb->ipsec_netif_rxring[0] = ring;
5ba3f43e 1057 } else {
cb323159
A
1058 uint8_t ring_idx = 0;
1059 if (ipsec_in_wmm_mode(pcb)) {
1060 int err;
1061 kern_packet_svc_class_t svc_class;
1062 err = kern_channel_get_service_class(ring, &svc_class);
1063 VERIFY(err == 0);
1064 ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1065 VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1066 }
1067
1068 *ring_ctx = (void *)(uintptr_t)ring_idx;
1069
1070 VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1071 pcb->ipsec_netif_txring[ring_idx] = ring;
5ba3f43e
A
1072 }
1073 return 0;
1074}
1075
1076static void
1077ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 1078 kern_channel_ring_t ring)
5ba3f43e
A
1079{
1080#pragma unused(nxprov)
1081 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
cb323159
A
1082 bool found = false;
1083
1084 for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1085 if (pcb->ipsec_netif_rxring[i] == ring) {
1086 pcb->ipsec_netif_rxring[i] = NULL;
1087 VERIFY(!found);
1088 found = true;
1089 }
1090 }
1091 for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1092 if (pcb->ipsec_netif_txring[i] == ring) {
1093 pcb->ipsec_netif_txring[i] = NULL;
1094 VERIFY(!found);
1095 found = true;
1096 }
5ba3f43e 1097 }
cb323159 1098 VERIFY(found);
5ba3f43e
A
1099}
1100
1101static bool
1102ipsec_netif_check_policy(mbuf_t data)
1103{
1104 necp_kernel_policy_result necp_result = 0;
1105 necp_kernel_policy_result_parameter necp_result_parameter = {};
1106 uint32_t necp_matched_policy_id = 0;
1107
1108 // This packet has been marked with IP level policy, do not mark again.
1109 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
0a7de745 1110 return true;
5ba3f43e
A
1111 }
1112
1113 size_t length = mbuf_pkthdr_len(data);
1114 if (length < sizeof(struct ip)) {
0a7de745 1115 return false;
5ba3f43e
A
1116 }
1117
1118 struct ip *ip = mtod(data, struct ip *);
1119 u_int ip_version = ip->ip_v;
1120 switch (ip_version) {
0a7de745 1121 case 4: {
cb323159 1122 necp_matched_policy_id = necp_ip_output_find_policy_match(data, 0, NULL, NULL,
0a7de745
A
1123 &necp_result, &necp_result_parameter);
1124 break;
1125 }
1126 case 6: {
cb323159 1127 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, 0, NULL, NULL,
0a7de745
A
1128 &necp_result, &necp_result_parameter);
1129 break;
1130 }
1131 default: {
1132 return false;
1133 }
5ba3f43e
A
1134 }
1135
1136 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
0a7de745 1137 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
5ba3f43e 1138 /* Drop and flow divert packets should be blocked at the IP layer */
0a7de745 1139 return false;
5ba3f43e
A
1140 }
1141
1142 necp_mark_packet_from_ip(data, necp_matched_policy_id);
0a7de745 1143 return true;
5ba3f43e
A
1144}
1145
1146static errno_t
1147ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 1148 kern_channel_ring_t tx_ring, uint32_t flags)
5ba3f43e
A
1149{
1150#pragma unused(nxprov)
1151#pragma unused(flags)
1152 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1153
1154 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1155
cb323159
A
1156 if (!ipsec_data_move_begin(pcb)) {
1157 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1158 return 0;
1159 }
1160
5ba3f43e
A
1161 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1162
1163 struct kern_channel_ring_stat_increment tx_ring_stats;
1164 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1165 kern_channel_slot_t tx_pslot = NULL;
1166 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1167
cb323159 1168 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
5ba3f43e
A
1169
1170 if (tx_slot == NULL) {
1171 // Nothing to write, don't bother signalling
1172 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 1173 ipsec_data_move_end(pcb);
5ba3f43e
A
1174 return 0;
1175 }
1176
cb323159
A
1177 if (pcb->ipsec_kpipe_count &&
1178 ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1179 // Select the corresponding kpipe rx ring
1180 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1181 VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1182 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1183
1184 // Unlock while calling notify
5ba3f43e
A
1185 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1186
1187 // Signal the kernel pipe ring to read
1188 if (rx_ring != NULL) {
1189 kern_channel_notify(rx_ring, 0);
1190 }
cb323159
A
1191
1192 ipsec_data_move_end(pcb);
5ba3f43e
A
1193 return 0;
1194 }
1195
1196 // If we're here, we're injecting into the BSD stack
1197 while (tx_slot != NULL) {
1198 size_t length = 0;
1199 mbuf_t data = NULL;
1200
1201 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1202
1203 // Advance TX ring
1204 tx_pslot = tx_slot;
1205 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1206
1207 if (tx_ph == 0) {
1208 continue;
1209 }
1210
1211 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1212 VERIFY(tx_buf != NULL);
1213 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1214 VERIFY(tx_baddr != 0);
1215 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1216
1217 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1218
1219 length = MIN(kern_packet_get_data_length(tx_ph),
0a7de745 1220 pcb->ipsec_slot_size);
5ba3f43e
A
1221
1222 if (length > 0) {
1223 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1224 if (error == 0) {
1225 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1226 if (error == 0) {
1227 // Mark packet from policy
1228 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1229 necp_mark_packet_from_ip(data, policy_id);
1230
1231 // Check policy with NECP
1232 if (!ipsec_netif_check_policy(data)) {
cb323159
A
1233 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1234 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1235 mbuf_freem(data);
1236 data = NULL;
1237 } else {
1238 // Send through encryption
1239 error = ipsec_output(pcb->ipsec_ifp, data);
1240 if (error != 0) {
cb323159 1241 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
5ba3f43e
A
1242 }
1243 }
1244 } else {
cb323159
A
1245 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1246 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1247 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1248 mbuf_freem(data);
1249 data = NULL;
1250 }
1251 } else {
cb323159
A
1252 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1253 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1254 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1255 }
1256 } else {
cb323159
A
1257 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1258 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1259 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1260 }
1261
1262 if (data == NULL) {
cb323159 1263 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
5ba3f43e
A
1264 break;
1265 }
1266
cb323159
A
1267 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1268 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
5ba3f43e
A
1269
1270 tx_ring_stats.kcrsi_slots_transferred++;
1271 tx_ring_stats.kcrsi_bytes_transferred += length;
1272 }
1273
1274 if (tx_pslot) {
1275 kern_channel_advance_slot(tx_ring, tx_pslot);
1276 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1277 (void)kern_channel_reclaim(tx_ring);
1278 }
1279
1280 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 1281 ipsec_data_move_end(pcb);
5ba3f43e
A
1282
1283 return 0;
1284}
1285
1286static errno_t
cb323159
A
1287ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1288 kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
5ba3f43e
A
1289{
1290#pragma unused(nxprov)
1291 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
5ba3f43e
A
1292 boolean_t more = false;
1293 errno_t rc = 0;
5ba3f43e 1294
cb323159
A
1295 VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1296
5c9f4661
A
1297 /*
1298 * Refill and sync the ring; we may be racing against another thread doing
1299 * an RX sync that also wants to do kr_enter(), and so use the blocking
1300 * variant here.
1301 */
1302 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1303 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
cb323159
A
1304 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1305 pcb->ipsec_if_xname, ring->ckr_name, rc);
5c9f4661
A
1306 }
1307
1308 (void) kr_enter(ring, TRUE);
1309 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
1310 if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1311 // ring no longer valid
1312 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1313 kr_exit(ring);
1314 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1315 pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1316 return ENXIO;
1317 }
5c9f4661 1318
cb323159 1319 if (pcb->ipsec_kpipe_count) {
5ba3f43e
A
1320 uint32_t tx_available = kern_channel_available_slot_count(ring);
1321 if (pcb->ipsec_netif_txring_size > 0 &&
0a7de745 1322 tx_available >= pcb->ipsec_netif_txring_size - 1) {
5ba3f43e
A
1323 // No room left in tx ring, disable output for now
1324 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1325 if (error != 0) {
cb323159 1326 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
5ba3f43e
A
1327 }
1328 }
1329 }
1330
cb323159
A
1331 if (pcb->ipsec_kpipe_count) {
1332 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
5ba3f43e
A
1333
1334 // Unlock while calling notify
1335 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1336 // Signal the kernel pipe ring to read
1337 if (rx_ring != NULL) {
1338 kern_channel_notify(rx_ring, 0);
1339 }
5ba3f43e
A
1340 } else {
1341 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1342 }
1343
5c9f4661
A
1344 kr_exit(ring);
1345
0a7de745 1346 return 0;
5ba3f43e
A
1347}
1348
cb323159
A
1349static errno_t
1350ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1351 kern_channel_ring_t ring, __unused uint32_t flags)
1352{
1353 errno_t ret = 0;
1354 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1355
1356 if (!ipsec_data_move_begin(pcb)) {
1357 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1358 return 0;
1359 }
1360
1361 if (ipsec_in_wmm_mode(pcb)) {
1362 for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1363 kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1364 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1365 if (ret) {
1366 break;
1367 }
1368 }
1369 } else {
1370 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1371 }
1372
1373 ipsec_data_move_end(pcb);
1374 return ret;
1375}
1376
5ba3f43e
A
1377static errno_t
1378ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 1379 kern_channel_ring_t rx_ring, uint32_t flags)
5ba3f43e
A
1380{
1381#pragma unused(nxprov)
1382#pragma unused(flags)
1383 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1384 struct kern_channel_ring_stat_increment rx_ring_stats;
1385
1386 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1387
cb323159
A
1388 if (!ipsec_data_move_begin(pcb)) {
1389 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1390 return 0;
1391 }
1392
5ba3f43e
A
1393 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1394
1395 // Reclaim user-released slots
1396 (void) kern_channel_reclaim(rx_ring);
1397
cb323159 1398 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
5ba3f43e
A
1399
1400 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1401 if (avail == 0) {
1402 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 1403 ipsec_data_move_end(pcb);
5ba3f43e
A
1404 return 0;
1405 }
1406
1407 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1408 VERIFY(rx_pp != NULL);
1409 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1410 kern_channel_slot_t rx_pslot = NULL;
1411 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1412
1413 while (rx_slot != NULL) {
1414 // Check for a waiting packet
1415 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1416 mbuf_t data = pcb->ipsec_input_chain;
1417 if (data == NULL) {
1418 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1419 break;
1420 }
1421
1422 // Allocate rx packet
1423 kern_packet_t rx_ph = 0;
1424 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
a39ff7e2 1425 if (__improbable(error != 0)) {
cb323159
A
1426 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1427 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1428 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1429 break;
1430 }
1431
1432 // Advance waiting packets
cb323159
A
1433 if (pcb->ipsec_input_chain_count > 0) {
1434 pcb->ipsec_input_chain_count--;
1435 }
5ba3f43e
A
1436 pcb->ipsec_input_chain = data->m_nextpkt;
1437 data->m_nextpkt = NULL;
1438 if (pcb->ipsec_input_chain == NULL) {
1439 pcb->ipsec_input_chain_last = NULL;
1440 }
1441 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1442
1443 size_t length = mbuf_pkthdr_len(data);
1444
1445 if (length < sizeof(struct ip)) {
1446 // Flush data
1447 mbuf_freem(data);
1448 kern_pbufpool_free(rx_pp, rx_ph);
cb323159
A
1449 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1450 STATS_INC(nifs, NETIF_STATS_DROP);
1451 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
0a7de745 1452 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
5ba3f43e
A
1453 continue;
1454 }
1455
1456 uint32_t af = 0;
1457 struct ip *ip = mtod(data, struct ip *);
1458 u_int ip_version = ip->ip_v;
1459 switch (ip_version) {
0a7de745
A
1460 case 4: {
1461 af = AF_INET;
1462 break;
1463 }
1464 case 6: {
1465 af = AF_INET6;
1466 break;
1467 }
1468 default: {
cb323159 1469 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
0a7de745
A
1470 pcb->ipsec_ifp->if_xname, ip_version);
1471 break;
1472 }
5ba3f43e
A
1473 }
1474
1475 if (length > rx_pp->pp_buflet_size ||
0a7de745 1476 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
5ba3f43e
A
1477 // We need to fragment to send up into the netif
1478
1479 u_int32_t fragment_mtu = rx_pp->pp_buflet_size;
1480 if (pcb->ipsec_frag_size_set &&
0a7de745 1481 pcb->ipsec_input_frag_size < rx_pp->pp_buflet_size) {
5ba3f43e
A
1482 fragment_mtu = pcb->ipsec_input_frag_size;
1483 }
1484
1485 mbuf_t fragment_chain = NULL;
1486 switch (af) {
0a7de745
A
1487 case AF_INET: {
1488 // ip_fragment expects the length in host order
1489 ip->ip_len = ntohs(ip->ip_len);
1490
1491 // ip_fragment will modify the original data, don't free
1492 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1493 if (fragment_error == 0 && data != NULL) {
1494 fragment_chain = data;
1495 } else {
cb323159
A
1496 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1497 STATS_INC(nifs, NETIF_STATS_DROP);
1498 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
0a7de745
A
1499 pcb->ipsec_ifp->if_xname, length, fragment_error);
1500 }
1501 break;
1502 }
1503 case AF_INET6: {
1504 if (length < sizeof(struct ip6_hdr)) {
1505 mbuf_freem(data);
cb323159
A
1506 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1507 STATS_INC(nifs, NETIF_STATS_DROP);
1508 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
0a7de745
A
1509 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1510 } else {
1511 // ip6_do_fragmentation will free the original data on success only
1512 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
5ba3f43e 1513
0a7de745 1514 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
cb323159 1515 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid()));
5ba3f43e
A
1516 if (fragment_error == 0 && data != NULL) {
1517 fragment_chain = data;
1518 } else {
5ba3f43e 1519 mbuf_freem(data);
cb323159
A
1520 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1521 STATS_INC(nifs, NETIF_STATS_DROP);
1522 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
0a7de745 1523 pcb->ipsec_ifp->if_xname, length, fragment_error);
5ba3f43e 1524 }
5ba3f43e 1525 }
0a7de745
A
1526 break;
1527 }
1528 default: {
1529 // Cannot fragment unknown families
1530 mbuf_freem(data);
cb323159
A
1531 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1532 STATS_INC(nifs, NETIF_STATS_DROP);
1533 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
0a7de745
A
1534 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1535 break;
1536 }
5ba3f43e
A
1537 }
1538
1539 if (fragment_chain != NULL) {
1540 // Add fragments to chain before continuing
1541 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1542 if (pcb->ipsec_input_chain != NULL) {
1543 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1544 } else {
1545 pcb->ipsec_input_chain = fragment_chain;
1546 }
cb323159 1547 pcb->ipsec_input_chain_count++;
5ba3f43e
A
1548 while (fragment_chain->m_nextpkt) {
1549 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1550 fragment_chain = fragment_chain->m_nextpkt;
cb323159 1551 pcb->ipsec_input_chain_count++;
5ba3f43e
A
1552 }
1553 pcb->ipsec_input_chain_last = fragment_chain;
1554 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1555 }
1556
1557 // Make sure to free unused rx packet
1558 kern_pbufpool_free(rx_pp, rx_ph);
1559
1560 continue;
1561 }
1562
1563 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1564
1565 // Fillout rx packet
1566 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1567 VERIFY(rx_buf != NULL);
1568 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1569 VERIFY(rx_baddr != NULL);
1570
1571 // Copy-in data from mbuf to buflet
1572 mbuf_copydata(data, 0, length, (void *)rx_baddr);
0a7de745 1573 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
5ba3f43e
A
1574
1575 // Finalize and attach the packet
1576 error = kern_buflet_set_data_offset(rx_buf, 0);
1577 VERIFY(error == 0);
1578 error = kern_buflet_set_data_length(rx_buf, length);
1579 VERIFY(error == 0);
cb323159 1580 error = kern_packet_set_headroom(rx_ph, 0);
5ba3f43e
A
1581 VERIFY(error == 0);
1582 error = kern_packet_finalize(rx_ph);
1583 VERIFY(error == 0);
1584 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1585 VERIFY(error == 0);
1586
cb323159
A
1587 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1588 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
5ba3f43e
A
1589 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1590
1591 rx_ring_stats.kcrsi_slots_transferred++;
1592 rx_ring_stats.kcrsi_bytes_transferred += length;
1593
1594 if (!pcb->ipsec_ext_ifdata_stats) {
1595 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1596 }
1597
1598 mbuf_freem(data);
1599
1600 // Advance ring
1601 rx_pslot = rx_slot;
1602 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1603 }
1604
cb323159
A
1605 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1606 struct kern_channel_ring_stat_increment tx_ring_stats;
1607 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1608 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1609 kern_channel_slot_t tx_pslot = NULL;
1610 kern_channel_slot_t tx_slot = NULL;
1611 if (tx_ring == NULL) {
1612 // Net-If TX ring not set up yet, nothing to read
1613 goto done;
1614 }
5ba3f43e 1615
5ba3f43e 1616
cb323159
A
1617 // Unlock ipsec before entering ring
1618 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 1619
cb323159 1620 (void)kr_enter(tx_ring, TRUE);
5ba3f43e 1621
cb323159
A
1622 // Lock again after entering and validate
1623 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 1624
cb323159
A
1625 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1626 goto done;
1627 }
5ba3f43e 1628
cb323159
A
1629 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1630 if (tx_slot == NULL) {
1631 // Nothing to read, don't bother signalling
1632 goto done;
5ba3f43e
A
1633 }
1634
cb323159
A
1635 while (rx_slot != NULL && tx_slot != NULL) {
1636 size_t length = 0;
1637 mbuf_t data = NULL;
1638 errno_t error = 0;
1639 uint32_t af;
1640
1641 // Allocate rx packet
1642 kern_packet_t rx_ph = 0;
1643 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1644 if (__improbable(error != 0)) {
1645 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1646 STATS_INC(nifs, NETIF_STATS_DROP);
1647 break;
1648 }
5ba3f43e 1649
cb323159 1650 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
5ba3f43e 1651
cb323159
A
1652 // Advance TX ring
1653 tx_pslot = tx_slot;
1654 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
5ba3f43e 1655
cb323159
A
1656 if (tx_ph == 0) {
1657 kern_pbufpool_free(rx_pp, rx_ph);
1658 continue;
1659 }
5ba3f43e 1660
cb323159
A
1661 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1662 VERIFY(tx_buf != NULL);
1663 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1664 VERIFY(tx_baddr != 0);
1665 tx_baddr += kern_buflet_get_data_offset(tx_buf);
5ba3f43e 1666
cb323159
A
1667 length = MIN(kern_packet_get_data_length(tx_ph),
1668 pcb->ipsec_slot_size);
5ba3f43e 1669
cb323159
A
1670 // Increment TX stats
1671 tx_ring_stats.kcrsi_slots_transferred++;
1672 tx_ring_stats.kcrsi_bytes_transferred += length;
1673
1674 if (length >= sizeof(struct ip)) {
1675 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
5ba3f43e 1676 if (error == 0) {
cb323159
A
1677 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1678 if (error == 0) {
1679 lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1680 struct ip *ip = mtod(data, struct ip *);
1681 u_int ip_version = ip->ip_v;
1682 switch (ip_version) {
1683 case 4: {
1684 af = AF_INET;
1685 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1686 ip->ip_off = ntohs(ip->ip_off);
1687
1688 if (length < ip->ip_len) {
1689 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1690 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1691 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1692 STATS_INC(nifs, NETIF_STATS_DROP);
1693 mbuf_freem(data);
1694 data = NULL;
1695 } else {
1696 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1697 }
1698 break;
5ba3f43e 1699 }
cb323159
A
1700 case 6: {
1701 if (length < sizeof(struct ip6_hdr)) {
1702 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1703 pcb->ipsec_ifp->if_xname, length);
1704 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1705 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1706 mbuf_freem(data);
1707 data = NULL;
1708 } else {
cb323159
A
1709 af = AF_INET6;
1710 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1711 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1712 if (length < ip6_len) {
1713 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1714 pcb->ipsec_ifp->if_xname, length, ip6_len);
1715 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1716 STATS_INC(nifs, NETIF_STATS_DROP);
1717 mbuf_freem(data);
1718 data = NULL;
1719 } else {
1720 int offset = sizeof(struct ip6_hdr);
1721 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1722 }
5ba3f43e 1723 }
cb323159 1724 break;
5ba3f43e 1725 }
cb323159
A
1726 default: {
1727 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
1728 pcb->ipsec_ifp->if_xname, ip_version);
1729 STATS_INC(nifs, NETIF_STATS_DROP);
1730 mbuf_freem(data);
1731 data = NULL;
1732 break;
1733 }
1734 }
1735 lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1736 } else {
1737 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1738 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1739 STATS_INC(nifs, NETIF_STATS_DROP);
0a7de745
A
1740 mbuf_freem(data);
1741 data = NULL;
5ba3f43e
A
1742 }
1743 } else {
cb323159
A
1744 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1745 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1746 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1747 }
1748 } else {
cb323159
A
1749 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1750 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1751 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e 1752 }
5ba3f43e 1753
cb323159
A
1754 if (data == NULL) {
1755 // Failed to get decrypted data data
1756 kern_pbufpool_free(rx_pp, rx_ph);
1757 continue;
1758 }
5ba3f43e 1759
cb323159
A
1760 length = mbuf_pkthdr_len(data);
1761 if (length > rx_pp->pp_buflet_size) {
1762 // Flush data
1763 mbuf_freem(data);
1764 kern_pbufpool_free(rx_pp, rx_ph);
1765 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1766 STATS_INC(nifs, NETIF_STATS_DROP);
1767 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1768 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1769 continue;
1770 }
5ba3f43e 1771
cb323159
A
1772 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1773
1774 // Fillout rx packet
1775 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1776 VERIFY(rx_buf != NULL);
1777 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1778 VERIFY(rx_baddr != NULL);
1779
1780 // Copy-in data from mbuf to buflet
1781 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1782 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1783
1784 // Finalize and attach the packet
1785 error = kern_buflet_set_data_offset(rx_buf, 0);
1786 VERIFY(error == 0);
1787 error = kern_buflet_set_data_length(rx_buf, length);
1788 VERIFY(error == 0);
1789 error = kern_packet_set_link_header_offset(rx_ph, 0);
1790 VERIFY(error == 0);
1791 error = kern_packet_set_network_header_offset(rx_ph, 0);
1792 VERIFY(error == 0);
1793 error = kern_packet_finalize(rx_ph);
1794 VERIFY(error == 0);
1795 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1796 VERIFY(error == 0);
1797
1798 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1799 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1800 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1801
1802 rx_ring_stats.kcrsi_slots_transferred++;
1803 rx_ring_stats.kcrsi_bytes_transferred += length;
1804
1805 if (!pcb->ipsec_ext_ifdata_stats) {
1806 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1807 }
5ba3f43e 1808
cb323159 1809 mbuf_freem(data);
5ba3f43e 1810
cb323159
A
1811 rx_pslot = rx_slot;
1812 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1813 }
5ba3f43e 1814
cb323159
A
1815done:
1816 if (tx_pslot) {
1817 kern_channel_advance_slot(tx_ring, tx_pslot);
1818 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1819 (void)kern_channel_reclaim(tx_ring);
5ba3f43e
A
1820 }
1821
cb323159
A
1822 // Unlock first, then exit ring
1823 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1824 if (tx_ring != NULL) {
1825 if (tx_pslot != NULL) {
1826 kern_channel_notify(tx_ring, 0);
1827 }
1828 kr_exit(tx_ring);
1829 }
5ba3f43e 1830
cb323159 1831 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e
A
1832 }
1833
5ba3f43e
A
1834 if (rx_pslot) {
1835 kern_channel_advance_slot(rx_ring, rx_pslot);
1836 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1837 }
1838
5ba3f43e 1839
5ba3f43e 1840 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 1841
cb323159 1842 ipsec_data_move_end(pcb);
5ba3f43e
A
1843 return 0;
1844}
1845
1846static errno_t
1847ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
0a7de745
A
1848 struct ifnet_init_eparams *init_params,
1849 struct ifnet **ifp)
5ba3f43e
A
1850{
1851 errno_t err;
1852 nexus_controller_t controller = kern_nexus_shared_controller();
1853 struct kern_nexus_net_init net_init;
a39ff7e2 1854 struct kern_pbufpool_init pp_init;
5ba3f43e
A
1855
1856 nexus_name_t provider_name;
1857 snprintf((char *)provider_name, sizeof(provider_name),
0a7de745 1858 "com.apple.netif.%s", pcb->ipsec_if_xname);
5ba3f43e
A
1859
1860 struct kern_nexus_provider_init prov_init = {
1861 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1862 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1863 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1864 .nxpi_connected = ipsec_nexus_connected,
1865 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1866 .nxpi_disconnected = ipsec_nexus_disconnected,
1867 .nxpi_ring_init = ipsec_netif_ring_init,
1868 .nxpi_ring_fini = ipsec_netif_ring_fini,
1869 .nxpi_slot_init = NULL,
1870 .nxpi_slot_fini = NULL,
1871 .nxpi_sync_tx = ipsec_netif_sync_tx,
1872 .nxpi_sync_rx = ipsec_netif_sync_rx,
1873 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1874 };
1875
1876 nexus_attr_t nxa = NULL;
1877 err = kern_nexus_attr_create(&nxa);
1878 IPSEC_IF_VERIFY(err == 0);
1879 if (err != 0) {
cb323159 1880 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
0a7de745 1881 __func__, err);
5ba3f43e
A
1882 goto failed;
1883 }
1884
5c9f4661 1885 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
5ba3f43e
A
1886 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1887 VERIFY(err == 0);
1888
1889 // Reset ring size for netif nexus to limit memory usage
5c9f4661 1890 uint64_t ring_size = pcb->ipsec_netif_ring_size;
5ba3f43e
A
1891 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1892 VERIFY(err == 0);
1893 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1894 VERIFY(err == 0);
1895
cb323159
A
1896 assert(err == 0);
1897
1898 if (ipsec_in_wmm_mode(pcb)) {
1899 os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1900 __func__, pcb->ipsec_if_xname);
1901
1902 init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1903
1904 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1905 IPSEC_NETIF_WMM_TX_RING_COUNT);
1906 VERIFY(err == 0);
1907 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1908 IPSEC_NETIF_WMM_RX_RING_COUNT);
1909 VERIFY(err == 0);
1910
1911 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1912 VERIFY(err == 0);
1913 }
1914
5ba3f43e
A
1915 pcb->ipsec_netif_txring_size = ring_size;
1916
0a7de745 1917 bzero(&pp_init, sizeof(pp_init));
a39ff7e2 1918 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
cb323159
A
1919 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1920 // Note: we need more packets than can be held in the tx and rx rings because
1921 // packets can also be in the AQM queue(s)
1922 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
a39ff7e2
A
1923 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1924 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1925 pp_init.kbi_max_frags = 1;
0a7de745 1926 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
a39ff7e2 1927 "%s", provider_name);
cb323159
A
1928 pp_init.kbi_ctx = NULL;
1929 pp_init.kbi_ctx_retain = NULL;
1930 pp_init.kbi_ctx_release = NULL;
a39ff7e2 1931
cb323159 1932 err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
a39ff7e2 1933 if (err != 0) {
cb323159 1934 os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
a39ff7e2
A
1935 goto failed;
1936 }
1937
5ba3f43e 1938 err = kern_nexus_controller_register_provider(controller,
0a7de745
A
1939 ipsec_nx_dom_prov,
1940 provider_name,
1941 &prov_init,
1942 sizeof(prov_init),
1943 nxa,
1944 &pcb->ipsec_nx.if_provider);
5ba3f43e
A
1945 IPSEC_IF_VERIFY(err == 0);
1946 if (err != 0) {
cb323159 1947 os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
0a7de745 1948 __func__, err);
5ba3f43e
A
1949 goto failed;
1950 }
1951
1952 bzero(&net_init, sizeof(net_init));
1953 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1954 net_init.nxneti_flags = 0;
1955 net_init.nxneti_eparams = init_params;
1956 net_init.nxneti_lladdr = NULL;
1957 net_init.nxneti_prepare = ipsec_netif_prepare;
a39ff7e2 1958 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
5ba3f43e 1959 err = kern_nexus_controller_alloc_net_provider_instance(controller,
0a7de745
A
1960 pcb->ipsec_nx.if_provider,
1961 pcb,
1962 &pcb->ipsec_nx.if_instance,
1963 &net_init,
1964 ifp);
5ba3f43e
A
1965 IPSEC_IF_VERIFY(err == 0);
1966 if (err != 0) {
cb323159 1967 os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
0a7de745 1968 __func__, err);
5ba3f43e 1969 kern_nexus_controller_deregister_provider(controller,
0a7de745 1970 pcb->ipsec_nx.if_provider);
5ba3f43e
A
1971 uuid_clear(pcb->ipsec_nx.if_provider);
1972 goto failed;
1973 }
1974
1975failed:
1976 if (nxa) {
1977 kern_nexus_attr_destroy(nxa);
1978 }
a39ff7e2
A
1979 if (err && pcb->ipsec_netif_pp != NULL) {
1980 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1981 pcb->ipsec_netif_pp = NULL;
1982 }
0a7de745 1983 return err;
5ba3f43e
A
1984}
1985
1986static void
1987ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1988{
1989 nexus_controller_t controller = kern_nexus_shared_controller();
0a7de745 1990 errno_t err;
5ba3f43e
A
1991
1992 if (!uuid_is_null(instance)) {
1993 err = kern_nexus_controller_free_provider_instance(controller,
0a7de745 1994 instance);
5ba3f43e 1995 if (err != 0) {
cb323159 1996 os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
0a7de745 1997 __func__, err);
5ba3f43e
A
1998 }
1999 uuid_clear(instance);
2000 }
2001 if (!uuid_is_null(provider)) {
2002 err = kern_nexus_controller_deregister_provider(controller,
0a7de745 2003 provider);
5ba3f43e 2004 if (err != 0) {
cb323159 2005 os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
5ba3f43e
A
2006 }
2007 uuid_clear(provider);
2008 }
2009 return;
2010}
2011
2012static void
a39ff7e2 2013ipsec_nexus_detach(struct ipsec_pcb *pcb)
5ba3f43e 2014{
a39ff7e2 2015 ipsec_nx_t nx = &pcb->ipsec_nx;
5ba3f43e 2016 nexus_controller_t controller = kern_nexus_shared_controller();
0a7de745 2017 errno_t err;
5ba3f43e 2018
cb323159 2019 if (!uuid_is_null(nx->fsw_host)) {
5ba3f43e 2020 err = kern_nexus_ifdetach(controller,
cb323159
A
2021 nx->fsw_instance,
2022 nx->fsw_host);
5ba3f43e 2023 if (err != 0) {
cb323159 2024 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms host failed %d\n",
0a7de745 2025 __func__, err);
5ba3f43e
A
2026 }
2027 }
2028
cb323159 2029 if (!uuid_is_null(nx->fsw_device)) {
5ba3f43e 2030 err = kern_nexus_ifdetach(controller,
cb323159
A
2031 nx->fsw_instance,
2032 nx->fsw_device);
5ba3f43e 2033 if (err != 0) {
cb323159 2034 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
0a7de745 2035 __func__, err);
5ba3f43e
A
2036 }
2037 }
2038
2039 ipsec_detach_provider_and_instance(nx->if_provider,
0a7de745 2040 nx->if_instance);
cb323159
A
2041 ipsec_detach_provider_and_instance(nx->fsw_provider,
2042 nx->fsw_instance);
5ba3f43e 2043
a39ff7e2
A
2044 if (pcb->ipsec_netif_pp != NULL) {
2045 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2046 pcb->ipsec_netif_pp = NULL;
a39ff7e2 2047 }
5ba3f43e
A
2048 memset(nx, 0, sizeof(*nx));
2049}
2050
2051static errno_t
5c9f4661 2052ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
cb323159 2053 const char *type_name,
0a7de745
A
2054 const char *ifname,
2055 uuid_t *provider, uuid_t *instance)
5ba3f43e
A
2056{
2057 nexus_attr_t attr = NULL;
2058 nexus_controller_t controller = kern_nexus_shared_controller();
2059 uuid_t dom_prov;
2060 errno_t err;
2061 struct kern_nexus_init init;
0a7de745 2062 nexus_name_t provider_name;
5ba3f43e 2063
cb323159 2064 err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
0a7de745 2065 &dom_prov);
5ba3f43e
A
2066 IPSEC_IF_VERIFY(err == 0);
2067 if (err != 0) {
cb323159 2068 os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
0a7de745 2069 __func__, type_name, err);
5ba3f43e
A
2070 goto failed;
2071 }
2072
2073 err = kern_nexus_attr_create(&attr);
2074 IPSEC_IF_VERIFY(err == 0);
2075 if (err != 0) {
cb323159 2076 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
0a7de745 2077 __func__, err);
5ba3f43e
A
2078 goto failed;
2079 }
2080
5c9f4661 2081 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
5ba3f43e
A
2082 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2083 VERIFY(err == 0);
2084
2085 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
5c9f4661 2086 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
5ba3f43e
A
2087 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2088 VERIFY(err == 0);
5c9f4661 2089 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
5ba3f43e
A
2090 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2091 VERIFY(err == 0);
2092
2093 snprintf((char *)provider_name, sizeof(provider_name),
0a7de745 2094 "com.apple.%s.%s", type_name, ifname);
5ba3f43e 2095 err = kern_nexus_controller_register_provider(controller,
0a7de745
A
2096 dom_prov,
2097 provider_name,
2098 NULL,
2099 0,
2100 attr,
2101 provider);
5ba3f43e
A
2102 kern_nexus_attr_destroy(attr);
2103 attr = NULL;
2104 IPSEC_IF_VERIFY(err == 0);
2105 if (err != 0) {
cb323159 2106 os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
0a7de745 2107 __func__, type_name, err);
5ba3f43e
A
2108 goto failed;
2109 }
0a7de745 2110 bzero(&init, sizeof(init));
5ba3f43e
A
2111 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2112 err = kern_nexus_controller_alloc_provider_instance(controller,
0a7de745
A
2113 *provider,
2114 NULL,
2115 instance, &init);
5ba3f43e
A
2116 IPSEC_IF_VERIFY(err == 0);
2117 if (err != 0) {
cb323159 2118 os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
0a7de745 2119 __func__, type_name, err);
5ba3f43e 2120 kern_nexus_controller_deregister_provider(controller,
0a7de745 2121 *provider);
5ba3f43e
A
2122 uuid_clear(*provider);
2123 }
2124failed:
0a7de745 2125 return err;
5ba3f43e
A
2126}
2127
2128static errno_t
cb323159 2129ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
5ba3f43e
A
2130{
2131 nexus_controller_t controller = kern_nexus_shared_controller();
2132 errno_t err = 0;
2133 ipsec_nx_t nx = &pcb->ipsec_nx;
2134
cb323159 2135 // Allocate flowswitch
5c9f4661 2136 err = ipsec_create_fs_provider_and_instance(pcb,
cb323159 2137 "flowswitch",
0a7de745 2138 pcb->ipsec_ifp->if_xname,
cb323159
A
2139 &nx->fsw_provider,
2140 &nx->fsw_instance);
5ba3f43e 2141 if (err != 0) {
cb323159 2142 os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
0a7de745 2143 __func__);
5ba3f43e
A
2144 goto failed;
2145 }
2146
cb323159
A
2147 // Attach flowswitch to device port
2148 err = kern_nexus_ifattach(controller, nx->fsw_instance,
0a7de745 2149 NULL, nx->if_instance,
cb323159 2150 FALSE, &nx->fsw_device);
5ba3f43e 2151 if (err != 0) {
cb323159 2152 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
5ba3f43e
A
2153 goto failed;
2154 }
2155
cb323159
A
2156 // Attach flowswitch to host port
2157 err = kern_nexus_ifattach(controller, nx->fsw_instance,
0a7de745 2158 NULL, nx->if_instance,
cb323159 2159 TRUE, &nx->fsw_host);
5ba3f43e 2160 if (err != 0) {
cb323159 2161 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms host %d\n", __func__, err);
5ba3f43e
A
2162 goto failed;
2163 }
2164
2165 // Extract the agent UUID and save for later
cb323159
A
2166 struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2167 if (flowswitch_nx != NULL) {
2168 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
5ba3f43e
A
2169 if (flowswitch != NULL) {
2170 FSW_RLOCK(flowswitch);
cb323159 2171 uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
5ba3f43e
A
2172 FSW_UNLOCK(flowswitch);
2173 } else {
cb323159 2174 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
5ba3f43e 2175 }
cb323159 2176 nx_release(flowswitch_nx);
5ba3f43e 2177 } else {
cb323159 2178 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
5ba3f43e
A
2179 }
2180
0a7de745 2181 return 0;
5ba3f43e
A
2182
2183failed:
a39ff7e2 2184 ipsec_nexus_detach(pcb);
5ba3f43e
A
2185
2186 errno_t detach_error = 0;
2187 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
cb323159 2188 panic("ipsec_flowswitch_attach - ifnet_detach failed: %d\n", detach_error);
5ba3f43e
A
2189 /* NOT REACHED */
2190 }
2191
0a7de745 2192 return err;
5ba3f43e
A
2193}
2194
2195#pragma mark Kernel Pipe Nexus
2196
2197static errno_t
cb323159 2198ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
5ba3f43e
A
2199{
2200 nexus_attr_t nxa = NULL;
2201 errno_t result;
2202
2203 lck_mtx_lock(&ipsec_lock);
2204 if (ipsec_ncd_refcount++) {
2205 lck_mtx_unlock(&ipsec_lock);
2206 return 0;
2207 }
2208
2209 result = kern_nexus_controller_create(&ipsec_ncd);
2210 if (result) {
cb323159 2211 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
0a7de745 2212 __FUNCTION__, result);
5ba3f43e
A
2213 goto done;
2214 }
2215
2216 uuid_t dom_prov;
cb323159 2217 result = kern_nexus_get_default_domain_provider(
0a7de745 2218 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
5ba3f43e 2219 if (result) {
cb323159 2220 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
0a7de745 2221 __FUNCTION__, result);
5ba3f43e
A
2222 goto done;
2223 }
2224
2225 struct kern_nexus_provider_init prov_init = {
2226 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2227 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2228 .nxpi_pre_connect = ipsec_nexus_pre_connect,
2229 .nxpi_connected = ipsec_nexus_connected,
2230 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2231 .nxpi_disconnected = ipsec_nexus_disconnected,
2232 .nxpi_ring_init = ipsec_kpipe_ring_init,
2233 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
2234 .nxpi_slot_init = NULL,
2235 .nxpi_slot_fini = NULL,
2236 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
2237 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
2238 .nxpi_tx_doorbell = NULL,
2239 };
2240
2241 result = kern_nexus_attr_create(&nxa);
2242 if (result) {
cb323159 2243 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
0a7de745 2244 __FUNCTION__, result);
5ba3f43e
A
2245 goto done;
2246 }
2247
2248 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2249 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2250 VERIFY(result == 0);
2251
2252 // Reset ring size for kernel pipe nexus to limit memory usage
cb323159
A
2253 // Note: It's better to have less on slots on the kpipe TX ring than the netif
2254 // so back pressure is applied at the AQM layer
2255 uint64_t ring_size =
2256 pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2257 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2258 if_ipsec_ring_size;
5ba3f43e
A
2259 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2260 VERIFY(result == 0);
cb323159
A
2261
2262 ring_size =
2263 pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2264 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2265 if_ipsec_ring_size;
5ba3f43e
A
2266 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2267 VERIFY(result == 0);
2268
2269 result = kern_nexus_controller_register_provider(ipsec_ncd,
0a7de745
A
2270 dom_prov,
2271 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
2272 &prov_init,
2273 sizeof(prov_init),
2274 nxa,
2275 &ipsec_kpipe_uuid);
5ba3f43e 2276 if (result) {
cb323159 2277 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
0a7de745 2278 __FUNCTION__, result);
5ba3f43e
A
2279 goto done;
2280 }
2281
2282done:
2283 if (nxa) {
2284 kern_nexus_attr_destroy(nxa);
2285 }
2286
2287 if (result) {
2288 if (ipsec_ncd) {
2289 kern_nexus_controller_destroy(ipsec_ncd);
2290 ipsec_ncd = NULL;
2291 }
2292 ipsec_ncd_refcount = 0;
2293 }
2294
2295 lck_mtx_unlock(&ipsec_lock);
2296
2297 return result;
2298}
2299
2300static void
2301ipsec_unregister_kernel_pipe_nexus(void)
2302{
2303 lck_mtx_lock(&ipsec_lock);
2304
2305 VERIFY(ipsec_ncd_refcount > 0);
2306
2307 if (--ipsec_ncd_refcount == 0) {
2308 kern_nexus_controller_destroy(ipsec_ncd);
2309 ipsec_ncd = NULL;
2310 }
2311
2312 lck_mtx_unlock(&ipsec_lock);
2313}
2314
cb323159
A
2315/* This structure only holds onto kpipe channels that need to be
2316 * freed in the future, but are cleared from the pcb under lock
2317 */
2318struct ipsec_detached_channels {
2319 int count;
2320 kern_pbufpool_t pp;
2321 uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2322};
5ba3f43e 2323
cb323159
A
2324static void
2325ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2326{
2327 LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
5ba3f43e 2328
cb323159
A
2329 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2330 for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2331 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2332 }
2333 dc->count = 0;
2334 return;
2335 }
5ba3f43e 2336
cb323159 2337 dc->count = pcb->ipsec_kpipe_count;
39236c6e 2338
cb323159
A
2339 VERIFY(dc->count >= 0);
2340 VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
39236c6e 2341
cb323159
A
2342 for (int i = 0; i < dc->count; i++) {
2343 VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2344 uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2345 uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2346 }
2347 for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2348 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2349 }
5ba3f43e 2350
cb323159
A
2351 if (dc->count) {
2352 VERIFY(pcb->ipsec_kpipe_pp);
5ba3f43e 2353 } else {
cb323159 2354 VERIFY(!pcb->ipsec_kpipe_pp);
5ba3f43e
A
2355 }
2356
cb323159
A
2357 dc->pp = pcb->ipsec_kpipe_pp;
2358
2359 pcb->ipsec_kpipe_pp = NULL;
2360
2361 ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2362}
2363
2364static void
2365ipsec_free_channels(struct ipsec_detached_channels *dc)
2366{
2367 if (!dc->count) {
2368 return;
5ba3f43e
A
2369 }
2370
cb323159
A
2371 for (int i = 0; i < dc->count; i++) {
2372 errno_t result;
2373 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, dc->uuids[i]);
2374 VERIFY(!result);
2375 }
2376
2377 VERIFY(dc->pp);
2378 kern_pbufpool_destroy(dc->pp);
2379
2380 ipsec_unregister_kernel_pipe_nexus();
2381
2382 memset(dc, 0, sizeof(*dc));
5ba3f43e
A
2383}
2384
2385static errno_t
2386ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
39236c6e 2387{
5ba3f43e 2388 struct kern_nexus_init init;
a39ff7e2 2389 struct kern_pbufpool_init pp_init;
5ba3f43e
A
2390 errno_t result;
2391
d9a64523
A
2392 kauth_cred_t cred = kauth_cred_get();
2393 result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2394 if (result) {
2395 return result;
2396 }
2397
cb323159
A
2398 VERIFY(pcb->ipsec_kpipe_count);
2399 VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
5ba3f43e 2400
cb323159 2401 result = ipsec_register_kernel_pipe_nexus(pcb);
5ba3f43e
A
2402
2403 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2404
cb323159
A
2405 if (result) {
2406 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2407 __func__, pcb->ipsec_if_xname);
a39ff7e2
A
2408 goto done;
2409 }
2410
cb323159 2411 VERIFY(ipsec_ncd);
5ba3f43e 2412
0a7de745 2413 bzero(&pp_init, sizeof(pp_init));
a39ff7e2 2414 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
cb323159
A
2415 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2416 // Note: We only needs are many packets as can be held in the tx and rx rings
2417 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
a39ff7e2
A
2418 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2419 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2420 pp_init.kbi_max_frags = 1;
2421 pp_init.kbi_flags |= KBIF_QUANTUM;
0a7de745 2422 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
a39ff7e2 2423 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
cb323159
A
2424 pp_init.kbi_ctx = NULL;
2425 pp_init.kbi_ctx_retain = NULL;
2426 pp_init.kbi_ctx_release = NULL;
a39ff7e2 2427
cb323159 2428 result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
a39ff7e2
A
2429 NULL);
2430 if (result != 0) {
cb323159
A
2431 os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2432 __func__, pcb->ipsec_if_xname, result);
a39ff7e2
A
2433 goto done;
2434 }
2435
0a7de745 2436 bzero(&init, sizeof(init));
5ba3f43e 2437 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
a39ff7e2 2438 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
5ba3f43e 2439
cb323159
A
2440 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2441 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2442 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2443 ipsec_kpipe_uuid, pcb, &pcb->ipsec_kpipe_uuid[i], &init);
2444
2445 if (result == 0) {
2446 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2447 pid_t pid = pcb->ipsec_kpipe_pid;
2448 if (!pid) {
2449 pid = proc_pid(proc);
2450 }
2451 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2452 pcb->ipsec_kpipe_uuid[i], &port,
2453 pid, NULL, NULL, 0, NEXUS_BIND_PID);
2454 }
5ba3f43e 2455
cb323159
A
2456 if (result) {
2457 /* Unwind all of them on error */
2458 for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2459 if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2460 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2461 pcb->ipsec_kpipe_uuid[j]);
2462 uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2463 }
2464 }
2465 goto done;
2466 }
2467 }
5ba3f43e
A
2468
2469done:
2470 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
0a7de745 2471
5ba3f43e 2472 if (result) {
a39ff7e2
A
2473 if (pcb->ipsec_kpipe_pp != NULL) {
2474 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2475 pcb->ipsec_kpipe_pp = NULL;
2476 }
5ba3f43e 2477 ipsec_unregister_kernel_pipe_nexus();
cb323159
A
2478 } else {
2479 ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
39236c6e 2480 }
0a7de745 2481
5ba3f43e 2482 return result;
39236c6e
A
2483}
2484
5ba3f43e
A
2485#endif // IPSEC_NEXUS
2486
39236c6e
A
2487
2488/* Kernel control functions */
2489
5ba3f43e 2490static inline void
5c9f4661 2491ipsec_free_pcb(struct ipsec_pcb *pcb, bool in_list)
5ba3f43e
A
2492{
2493#if IPSEC_NEXUS
2494 mbuf_freem_list(pcb->ipsec_input_chain);
cb323159 2495 pcb->ipsec_input_chain_count = 0;
5ba3f43e 2496 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, ipsec_lck_grp);
cb323159
A
2497 lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, ipsec_lck_grp);
2498 lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, ipsec_lck_grp);
5ba3f43e 2499#endif // IPSEC_NEXUS
cb323159 2500 lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, ipsec_lck_grp);
5ba3f43e 2501 lck_rw_destroy(&pcb->ipsec_pcb_lock, ipsec_lck_grp);
5c9f4661
A
2502 if (in_list) {
2503 lck_mtx_lock(&ipsec_lock);
2504 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2505 lck_mtx_unlock(&ipsec_lock);
2506 }
5ba3f43e
A
2507 zfree(ipsec_pcb_zone, pcb);
2508}
2509
39236c6e 2510static errno_t
5c9f4661 2511ipsec_ctl_bind(kern_ctl_ref kctlref,
0a7de745
A
2512 struct sockaddr_ctl *sac,
2513 void **unitinfo)
39236c6e 2514{
5ba3f43e
A
2515 struct ipsec_pcb *pcb = zalloc(ipsec_pcb_zone);
2516 memset(pcb, 0, sizeof(*pcb));
39037602 2517
39236c6e 2518 /* Setup the protocol control block */
39236c6e
A
2519 *unitinfo = pcb;
2520 pcb->ipsec_ctlref = kctlref;
2521 pcb->ipsec_unit = sac->sc_unit;
fe8ab488 2522 pcb->ipsec_output_service_class = MBUF_SC_OAM;
5ba3f43e 2523
5c9f4661
A
2524#if IPSEC_NEXUS
2525 pcb->ipsec_use_netif = false;
2526 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
cb323159
A
2527 pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2528 pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2529 pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
5c9f4661
A
2530#endif // IPSEC_NEXUS
2531
2532 lck_rw_init(&pcb->ipsec_pcb_lock, ipsec_lck_grp, ipsec_lck_attr);
cb323159 2533 lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, ipsec_lck_grp, ipsec_lck_attr);
5c9f4661 2534#if IPSEC_NEXUS
cb323159 2535 pcb->ipsec_input_chain_count = 0;
5c9f4661 2536 lck_mtx_init(&pcb->ipsec_input_chain_lock, ipsec_lck_grp, ipsec_lck_attr);
cb323159
A
2537 lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, ipsec_lck_grp, ipsec_lck_attr);
2538 lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, ipsec_lck_grp, ipsec_lck_attr);
5c9f4661
A
2539#endif // IPSEC_NEXUS
2540
0a7de745 2541 return 0;
5c9f4661
A
2542}
2543
2544static errno_t
2545ipsec_ctl_connect(kern_ctl_ref kctlref,
0a7de745
A
2546 struct sockaddr_ctl *sac,
2547 void **unitinfo)
5c9f4661
A
2548{
2549 struct ifnet_init_eparams ipsec_init = {};
2550 errno_t result = 0;
2551
2552 if (*unitinfo == NULL) {
2553 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2554 }
2555
2556 struct ipsec_pcb *pcb = *unitinfo;
2557
5ba3f43e
A
2558 lck_mtx_lock(&ipsec_lock);
2559
2560 /* Find some open interface id */
2561 u_int32_t chosen_unique_id = 1;
2562 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2563 if (next_pcb != NULL) {
2564 /* List was not empty, add one to the last item */
2565 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2566 next_pcb = NULL;
2567
2568 /*
2569 * If this wrapped the id number, start looking at
2570 * the front of the list for an unused id.
2571 */
2572 if (chosen_unique_id == 0) {
2573 /* Find the next unused ID */
2574 chosen_unique_id = 1;
2575 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2576 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2577 /* We found a gap */
2578 break;
2579 }
2580
2581 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2582 }
2583 }
2584 }
2585
2586 pcb->ipsec_unique_id = chosen_unique_id;
2587
2588 if (next_pcb != NULL) {
2589 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2590 } else {
2591 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2592 }
2593 lck_mtx_unlock(&ipsec_lock);
2594
2595 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2596 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
cb323159 2597 os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
5ba3f43e 2598
39236c6e
A
2599 /* Create the interface */
2600 bzero(&ipsec_init, sizeof(ipsec_init));
2601 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
0a7de745 2602 ipsec_init.len = sizeof(ipsec_init);
5ba3f43e
A
2603
2604#if IPSEC_NEXUS
5c9f4661
A
2605 if (pcb->ipsec_use_netif) {
2606 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2607 } else
5ba3f43e 2608#endif // IPSEC_NEXUS
5c9f4661
A
2609 {
2610 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2611 ipsec_init.start = ipsec_start;
2612 }
5ba3f43e 2613 ipsec_init.name = "ipsec";
39236c6e 2614 ipsec_init.unit = pcb->ipsec_unit - 1;
5ba3f43e
A
2615 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2616 ipsec_init.uniqueid_len = strlen(pcb->ipsec_unique_name);
cb323159 2617 ipsec_init.family = IFNET_FAMILY_IPSEC;
39236c6e
A
2618 ipsec_init.type = IFT_OTHER;
2619 ipsec_init.demux = ipsec_demux;
2620 ipsec_init.add_proto = ipsec_add_proto;
2621 ipsec_init.del_proto = ipsec_del_proto;
2622 ipsec_init.softc = pcb;
2623 ipsec_init.ioctl = ipsec_ioctl;
2624 ipsec_init.detach = ipsec_detached;
5ba3f43e
A
2625
2626#if IPSEC_NEXUS
cb323159
A
2627 /* We don't support kpipes without a netif */
2628 if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2629 result = ENOTSUP;
2630 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2631 ipsec_free_pcb(pcb, true);
2632 *unitinfo = NULL;
2633 return result;
2634 }
2635
2636 if (if_ipsec_debug != 0) {
2637 printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2638 "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2639 __func__,
2640 ipsec_init.name, ipsec_init.unit,
2641 pcb->ipsec_use_netif,
2642 pcb->ipsec_kpipe_count,
2643 pcb->ipsec_slot_size,
2644 pcb->ipsec_netif_ring_size,
2645 pcb->ipsec_kpipe_tx_ring_size,
2646 pcb->ipsec_kpipe_rx_ring_size);
2647 }
5c9f4661 2648 if (pcb->ipsec_use_netif) {
cb323159
A
2649 if (pcb->ipsec_kpipe_count) {
2650 result = ipsec_enable_channel(pcb, current_proc());
2651 if (result) {
2652 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2653 __func__, pcb->ipsec_if_xname);
2654 ipsec_free_pcb(pcb, true);
2655 *unitinfo = NULL;
2656 return result;
2657 }
2658 }
2659
5c9f4661
A
2660 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2661 if (result != 0) {
cb323159 2662 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
5c9f4661
A
2663 ipsec_free_pcb(pcb, true);
2664 *unitinfo = NULL;
2665 return result;
2666 }
5ba3f43e 2667
cb323159 2668 result = ipsec_flowswitch_attach(pcb);
5c9f4661 2669 if (result != 0) {
cb323159
A
2670 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2671 // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2672 // in ipsec_detached().
5c9f4661
A
2673 *unitinfo = NULL;
2674 return result;
2675 }
5ba3f43e 2676
5c9f4661
A
2677 /* Attach to bpf */
2678 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2679 } else
5ba3f43e 2680#endif // IPSEC_NEXUS
5c9f4661
A
2681 {
2682 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2683 if (result != 0) {
cb323159 2684 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
5c9f4661
A
2685 ipsec_free_pcb(pcb, true);
2686 *unitinfo = NULL;
2687 return result;
2688 }
2689 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2690
2691 /* Attach the interface */
2692 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2693 if (result != 0) {
cb323159 2694 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
5c9f4661
A
2695 ifnet_release(pcb->ipsec_ifp);
2696 ipsec_free_pcb(pcb, true);
2697 *unitinfo = NULL;
0a7de745 2698 return result;
5c9f4661 2699 }
5ba3f43e 2700
5c9f4661
A
2701 /* Attach to bpf */
2702 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2703 }
5ba3f43e 2704
cb323159
A
2705 /*
2706 * Mark the data path as ready.
2707 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2708 */
2709 if (pcb->ipsec_kpipe_count == 0) {
2710 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2711 IPSEC_SET_DATA_PATH_READY(pcb);
2712 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2713 }
2714
5ba3f43e
A
2715 /* The interfaces resoures allocated, mark it as running */
2716 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2717
0a7de745 2718 return 0;
39236c6e
A
2719}
2720
2721static errno_t
0a7de745
A
2722ipsec_detach_ip(ifnet_t interface,
2723 protocol_family_t protocol,
2724 socket_t pf_socket)
39236c6e
A
2725{
2726 errno_t result = EPROTONOSUPPORT;
0a7de745 2727
39236c6e
A
2728 /* Attempt a detach */
2729 if (protocol == PF_INET) {
0a7de745
A
2730 struct ifreq ifr;
2731
39236c6e
A
2732 bzero(&ifr, sizeof(ifr));
2733 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
0a7de745
A
2734 ifnet_name(interface), ifnet_unit(interface));
2735
39236c6e 2736 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
0a7de745
A
2737 } else if (protocol == PF_INET6) {
2738 struct in6_ifreq ifr6;
2739
39236c6e
A
2740 bzero(&ifr6, sizeof(ifr6));
2741 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
0a7de745
A
2742 ifnet_name(interface), ifnet_unit(interface));
2743
39236c6e
A
2744 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2745 }
0a7de745 2746
39236c6e
A
2747 return result;
2748}
2749
2750static void
0a7de745
A
2751ipsec_remove_address(ifnet_t interface,
2752 protocol_family_t protocol,
2753 ifaddr_t address,
2754 socket_t pf_socket)
39236c6e
A
2755{
2756 errno_t result = 0;
0a7de745 2757
39236c6e
A
2758 /* Attempt a detach */
2759 if (protocol == PF_INET) {
0a7de745
A
2760 struct ifreq ifr;
2761
39236c6e
A
2762 bzero(&ifr, sizeof(ifr));
2763 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
0a7de745 2764 ifnet_name(interface), ifnet_unit(interface));
39236c6e
A
2765 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2766 if (result != 0) {
cb323159 2767 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
0a7de745 2768 } else {
39236c6e
A
2769 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2770 if (result != 0) {
cb323159 2771 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
39236c6e
A
2772 }
2773 }
0a7de745
A
2774 } else if (protocol == PF_INET6) {
2775 struct in6_ifreq ifr6;
2776
39236c6e
A
2777 bzero(&ifr6, sizeof(ifr6));
2778 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
0a7de745 2779 ifnet_name(interface), ifnet_unit(interface));
39236c6e 2780 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
0a7de745 2781 sizeof(ifr6.ifr_addr));
39236c6e 2782 if (result != 0) {
cb323159 2783 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
0a7de745
A
2784 result);
2785 } else {
39236c6e
A
2786 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2787 if (result != 0) {
cb323159 2788 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
0a7de745 2789 result);
39236c6e
A
2790 }
2791 }
2792 }
2793}
2794
2795static void
0a7de745
A
2796ipsec_cleanup_family(ifnet_t interface,
2797 protocol_family_t protocol)
39236c6e 2798{
0a7de745
A
2799 errno_t result = 0;
2800 socket_t pf_socket = NULL;
2801 ifaddr_t *addresses = NULL;
2802 int i;
2803
39236c6e 2804 if (protocol != PF_INET && protocol != PF_INET6) {
cb323159 2805 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
39236c6e
A
2806 return;
2807 }
0a7de745 2808
39236c6e
A
2809 /* Create a socket for removing addresses and detaching the protocol */
2810 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2811 if (result != 0) {
0a7de745 2812 if (result != EAFNOSUPPORT) {
cb323159 2813 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
0a7de745
A
2814 protocol == PF_INET ? "IP" : "IPv6", result);
2815 }
39236c6e
A
2816 goto cleanup;
2817 }
0a7de745 2818
39236c6e
A
2819 /* always set SS_PRIV, we want to close and detach regardless */
2820 sock_setpriv(pf_socket, 1);
0a7de745 2821
39236c6e
A
2822 result = ipsec_detach_ip(interface, protocol, pf_socket);
2823 if (result == 0 || result == ENXIO) {
2824 /* We are done! We either detached or weren't attached. */
2825 goto cleanup;
0a7de745 2826 } else if (result != EBUSY) {
39236c6e 2827 /* Uh, not really sure what happened here... */
cb323159 2828 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
39236c6e
A
2829 goto cleanup;
2830 }
0a7de745 2831
39236c6e
A
2832 /*
2833 * At this point, we received an EBUSY error. This means there are
2834 * addresses attached. We should detach them and then try again.
2835 */
2836 result = ifnet_get_address_list_family(interface, &addresses, protocol);
2837 if (result != 0) {
cb323159 2838 os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
0a7de745
A
2839 ifnet_name(interface), ifnet_unit(interface),
2840 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
39236c6e
A
2841 goto cleanup;
2842 }
0a7de745 2843
39236c6e
A
2844 for (i = 0; addresses[i] != 0; i++) {
2845 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2846 }
2847 ifnet_free_address_list(addresses);
2848 addresses = NULL;
0a7de745 2849
39236c6e
A
2850 /*
2851 * The addresses should be gone, we should try the remove again.
2852 */
2853 result = ipsec_detach_ip(interface, protocol, pf_socket);
2854 if (result != 0 && result != ENXIO) {
cb323159 2855 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
39236c6e 2856 }
0a7de745 2857
39236c6e 2858cleanup:
0a7de745 2859 if (pf_socket != NULL) {
39236c6e 2860 sock_close(pf_socket);
0a7de745
A
2861 }
2862
2863 if (addresses != NULL) {
39236c6e 2864 ifnet_free_address_list(addresses);
0a7de745 2865 }
39236c6e
A
2866}
2867
2868static errno_t
0a7de745
A
2869ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2870 __unused u_int32_t unit,
2871 void *unitinfo)
39236c6e 2872{
5ba3f43e
A
2873 struct ipsec_pcb *pcb = unitinfo;
2874 ifnet_t ifp = NULL;
2875 errno_t result = 0;
39037602 2876
5ba3f43e 2877 if (pcb == NULL) {
39037602 2878 return EINVAL;
5ba3f43e
A
2879 }
2880
cb323159
A
2881 /* Wait until all threads in the data paths are done. */
2882 ipsec_wait_data_move_drain(pcb);
2883
5ba3f43e
A
2884#if IPSEC_NEXUS
2885 // Tell the nexus to stop all rings
2886 if (pcb->ipsec_netif_nexus != NULL) {
2887 kern_nexus_stop(pcb->ipsec_netif_nexus);
2888 }
2889#endif // IPSEC_NEXUS
2890
2891 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2892
2893#if IPSEC_NEXUS
cb323159
A
2894 if (if_ipsec_debug != 0) {
2895 printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2896 pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2897 }
2898
2899 struct ipsec_detached_channels dc;
2900 ipsec_detach_channels(pcb, &dc);
5ba3f43e 2901#endif // IPSEC_NEXUS
39037602 2902
39236c6e 2903 pcb->ipsec_ctlref = NULL;
5ba3f43e 2904
5c9f4661
A
2905 ifp = pcb->ipsec_ifp;
2906 if (ifp != NULL) {
2907#if IPSEC_NEXUS
2908 if (pcb->ipsec_netif_nexus != NULL) {
2909 /*
2910 * Quiesce the interface and flush any pending outbound packets.
2911 */
2912 if_down(ifp);
2913
2914 /* Increment refcnt, but detach interface */
2915 ifnet_incr_iorefcnt(ifp);
2916 if ((result = ifnet_detach(ifp)) != 0) {
2917 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2918 /* NOT REACHED */
2919 }
5ba3f43e 2920
5c9f4661
A
2921 /*
2922 * We want to do everything in our power to ensure that the interface
2923 * really goes away when the socket is closed. We must remove IP/IPv6
2924 * addresses and detach the protocols. Finally, we can remove and
2925 * release the interface.
2926 */
2927 key_delsp_for_ipsec_if(ifp);
5ba3f43e 2928
5c9f4661
A
2929 ipsec_cleanup_family(ifp, AF_INET);
2930 ipsec_cleanup_family(ifp, AF_INET6);
5ba3f43e 2931
5c9f4661
A
2932 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2933
cb323159
A
2934 ipsec_free_channels(&dc);
2935
a39ff7e2 2936 ipsec_nexus_detach(pcb);
5c9f4661
A
2937
2938 /* Decrement refcnt to finish detaching and freeing */
2939 ifnet_decr_iorefcnt(ifp);
2940 } else
2941#endif // IPSEC_NEXUS
2942 {
2943 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
5ba3f43e
A
2944
2945#if IPSEC_NEXUS
cb323159 2946 ipsec_free_channels(&dc);
5ba3f43e
A
2947#endif // IPSEC_NEXUS
2948
5c9f4661
A
2949 /*
2950 * We want to do everything in our power to ensure that the interface
2951 * really goes away when the socket is closed. We must remove IP/IPv6
2952 * addresses and detach the protocols. Finally, we can remove and
2953 * release the interface.
2954 */
2955 key_delsp_for_ipsec_if(ifp);
2956
2957 ipsec_cleanup_family(ifp, AF_INET);
2958 ipsec_cleanup_family(ifp, AF_INET6);
2959
2960 /*
2961 * Detach now; ipsec_detach() will be called asynchronously once
2962 * the I/O reference count drops to 0. There we will invoke
2963 * ifnet_release().
2964 */
2965 if ((result = ifnet_detach(ifp)) != 0) {
cb323159 2966 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
5c9f4661
A
2967 }
2968 }
2969 } else {
2970 // Bound, but not connected
2971 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2972 ipsec_free_pcb(pcb, false);
2973 }
0a7de745 2974
39236c6e
A
2975 return 0;
2976}
2977
2978static errno_t
0a7de745
A
2979ipsec_ctl_send(__unused kern_ctl_ref kctlref,
2980 __unused u_int32_t unit,
2981 __unused void *unitinfo,
2982 mbuf_t m,
2983 __unused int flags)
39236c6e 2984{
0a7de745
A
2985 /* Receive messages from the control socket. Currently unused. */
2986 mbuf_freem(m);
39236c6e
A
2987 return 0;
2988}
2989
2990static errno_t
0a7de745
A
2991ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
2992 __unused u_int32_t unit,
2993 void *unitinfo,
2994 int opt,
2995 void *data,
2996 size_t len)
39236c6e 2997{
0a7de745
A
2998 struct ipsec_pcb *pcb = unitinfo;
2999 errno_t result = 0;
3000
39236c6e
A
3001 /* check for privileges for privileged options */
3002 switch (opt) {
0a7de745
A
3003 case IPSEC_OPT_FLAGS:
3004 case IPSEC_OPT_EXT_IFDATA_STATS:
3005 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3006 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3007 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3008 return EPERM;
3009 }
3010 break;
39236c6e 3011 }
0a7de745 3012
39236c6e 3013 switch (opt) {
cb323159 3014 case IPSEC_OPT_FLAGS: {
0a7de745
A
3015 if (len != sizeof(u_int32_t)) {
3016 result = EMSGSIZE;
3017 } else {
cb323159 3018 pcb->ipsec_external_flags = *(u_int32_t *)data;
0a7de745
A
3019 }
3020 break;
cb323159 3021 }
0a7de745 3022
cb323159 3023 case IPSEC_OPT_EXT_IFDATA_STATS: {
0a7de745
A
3024 if (len != sizeof(int)) {
3025 result = EMSGSIZE;
39236c6e 3026 break;
0a7de745
A
3027 }
3028 if (pcb->ipsec_ifp == NULL) {
3029 // Only can set after connecting
3030 result = EINVAL;
39236c6e 3031 break;
0a7de745
A
3032 }
3033 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3034 break;
cb323159 3035 }
0a7de745
A
3036
3037 case IPSEC_OPT_INC_IFDATA_STATS_IN:
3038 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3039 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3040
3041 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3042 result = EINVAL;
39236c6e
A
3043 break;
3044 }
0a7de745
A
3045 if (pcb->ipsec_ifp == NULL) {
3046 // Only can set after connecting
3047 result = EINVAL;
39236c6e
A
3048 break;
3049 }
0a7de745
A
3050 if (!pcb->ipsec_ext_ifdata_stats) {
3051 result = EINVAL;
fe8ab488
A
3052 break;
3053 }
0a7de745
A
3054 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3055 ifnet_stat_increment_in(pcb->ipsec_ifp, utsp->utsp_packets,
3056 utsp->utsp_bytes, utsp->utsp_errors);
3057 } else {
3058 ifnet_stat_increment_out(pcb->ipsec_ifp, utsp->utsp_packets,
3059 utsp->utsp_bytes, utsp->utsp_errors);
3060 }
3061 break;
3062 }
5ba3f43e 3063
0a7de745 3064 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
cb323159
A
3065 ifnet_t del_ifp = NULL;
3066 char name[IFNAMSIZ];
0a7de745
A
3067
3068 if (len > IFNAMSIZ - 1) {
3069 result = EMSGSIZE;
5ba3f43e
A
3070 break;
3071 }
0a7de745
A
3072 if (pcb->ipsec_ifp == NULL) {
3073 // Only can set after connecting
3074 result = EINVAL;
3075 break;
3076 }
cb323159 3077 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
0a7de745
A
3078 bcopy(data, name, len);
3079 name[len] = 0;
3080 result = ifnet_find_by_name(name, &del_ifp);
3081 }
3082 if (result == 0) {
cb323159 3083 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
0a7de745
A
3084 __func__, pcb->ipsec_ifp->if_xname,
3085 del_ifp ? del_ifp->if_xname : "NULL");
5ba3f43e 3086
0a7de745
A
3087 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3088 if (del_ifp) {
3089 ifnet_release(del_ifp);
a39ff7e2 3090 }
0a7de745
A
3091 }
3092 break;
3093 }
a39ff7e2 3094
0a7de745
A
3095 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3096 if (len != sizeof(int)) {
3097 result = EMSGSIZE;
3098 break;
3099 }
3100 if (pcb->ipsec_ifp == NULL) {
3101 // Only can set after connecting
3102 result = EINVAL;
5ba3f43e
A
3103 break;
3104 }
0a7de745
A
3105 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3106 if (output_service_class == MBUF_SC_UNSPEC) {
3107 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3108 } else {
3109 pcb->ipsec_output_service_class = output_service_class;
3110 }
cb323159 3111 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
0a7de745
A
3112 __func__, pcb->ipsec_ifp->if_xname,
3113 pcb->ipsec_output_service_class);
3114 break;
3115 }
5ba3f43e 3116
0a7de745
A
3117#if IPSEC_NEXUS
3118 case IPSEC_OPT_ENABLE_CHANNEL: {
3119 if (len != sizeof(int)) {
3120 result = EMSGSIZE;
5ba3f43e
A
3121 break;
3122 }
cb323159
A
3123 if (pcb->ipsec_ifp != NULL) {
3124 // Only can set before connecting
0a7de745 3125 result = EINVAL;
5c9f4661
A
3126 break;
3127 }
cb323159
A
3128 if ((*(int *)data) != 0 &&
3129 (*(int *)data) != 1 &&
3130 (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3131 result = EINVAL;
3132 break;
0a7de745 3133 }
cb323159
A
3134 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3135 pcb->ipsec_kpipe_count = *(int *)data;
3136 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3137 break;
3138 }
3139
3140 case IPSEC_OPT_CHANNEL_BIND_PID: {
3141 if (len != sizeof(pid_t)) {
3142 result = EMSGSIZE;
3143 break;
3144 }
3145 if (pcb->ipsec_ifp != NULL) {
3146 // Only can set before connecting
3147 result = EINVAL;
3148 break;
3149 }
3150 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3151 pcb->ipsec_kpipe_pid = *(pid_t *)data;
3152 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
0a7de745
A
3153 break;
3154 }
3155
3156 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3157 if (len != sizeof(int)) {
3158 result = EMSGSIZE;
5c9f4661
A
3159 break;
3160 }
0a7de745
A
3161 if (pcb->ipsec_ifp == NULL) {
3162 // Only can set after connecting
3163 result = EINVAL;
5c9f4661
A
3164 break;
3165 }
cb323159 3166 if (!if_is_fsw_transport_netagent_enabled()) {
0a7de745 3167 result = ENOTSUP;
5c9f4661
A
3168 break;
3169 }
cb323159 3170 if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
0a7de745 3171 result = ENOENT;
5c9f4661
A
3172 break;
3173 }
3174
cb323159
A
3175 uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3176
0a7de745 3177 if (*(int *)data) {
cb323159
A
3178 flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3179 NETAGENT_FLAG_NEXUS_LISTENER);
3180 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
0a7de745
A
3181 pcb->ipsec_needs_netagent = true;
3182 } else {
3183 pcb->ipsec_needs_netagent = false;
cb323159
A
3184 flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3185 NETAGENT_FLAG_NEXUS_LISTENER);
3186 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
0a7de745
A
3187 }
3188 break;
3189 }
3190
3191 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3192 if (len != sizeof(u_int32_t)) {
3193 result = EMSGSIZE;
3194 break;
3195 }
3196 u_int32_t input_frag_size = *(u_int32_t *)data;
3197 if (input_frag_size <= sizeof(struct ip6_hdr)) {
3198 pcb->ipsec_frag_size_set = FALSE;
3199 pcb->ipsec_input_frag_size = 0;
3200 } else {
0a7de745
A
3201 pcb->ipsec_frag_size_set = TRUE;
3202 pcb->ipsec_input_frag_size = input_frag_size;
3203 }
3204 break;
3205 }
3206 case IPSEC_OPT_ENABLE_NETIF: {
3207 if (len != sizeof(int)) {
3208 result = EMSGSIZE;
3209 break;
3210 }
3211 if (pcb->ipsec_ifp != NULL) {
3212 // Only can set before connecting
3213 result = EINVAL;
3214 break;
3215 }
3216 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3217 pcb->ipsec_use_netif = !!(*(int *)data);
3218 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3219 break;
3220 }
3221 case IPSEC_OPT_SLOT_SIZE: {
3222 if (len != sizeof(u_int32_t)) {
3223 result = EMSGSIZE;
3224 break;
3225 }
3226 if (pcb->ipsec_ifp != NULL) {
3227 // Only can set before connecting
3228 result = EINVAL;
39236c6e 3229 break;
0a7de745
A
3230 }
3231 u_int32_t slot_size = *(u_int32_t *)data;
3232 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3233 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3234 return EINVAL;
3235 }
3236 pcb->ipsec_slot_size = slot_size;
cb323159
A
3237 if (if_ipsec_debug != 0) {
3238 printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3239 }
0a7de745 3240 break;
39236c6e 3241 }
0a7de745
A
3242 case IPSEC_OPT_NETIF_RING_SIZE: {
3243 if (len != sizeof(u_int32_t)) {
3244 result = EMSGSIZE;
3245 break;
3246 }
3247 if (pcb->ipsec_ifp != NULL) {
3248 // Only can set before connecting
3249 result = EINVAL;
3250 break;
3251 }
3252 u_int32_t ring_size = *(u_int32_t *)data;
3253 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3254 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3255 return EINVAL;
3256 }
3257 pcb->ipsec_netif_ring_size = ring_size;
cb323159
A
3258 if (if_ipsec_debug != 0) {
3259 printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3260 }
0a7de745
A
3261 break;
3262 }
3263 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3264 if (len != sizeof(u_int32_t)) {
3265 result = EMSGSIZE;
3266 break;
3267 }
3268 if (pcb->ipsec_ifp != NULL) {
3269 // Only can set before connecting
3270 result = EINVAL;
3271 break;
3272 }
3273 u_int32_t ring_size = *(u_int32_t *)data;
3274 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3275 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3276 return EINVAL;
3277 }
3278 pcb->ipsec_tx_fsw_ring_size = ring_size;
cb323159
A
3279 if (if_ipsec_debug != 0) {
3280 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3281 }
0a7de745
A
3282 break;
3283 }
3284 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3285 if (len != sizeof(u_int32_t)) {
3286 result = EMSGSIZE;
3287 break;
3288 }
3289 if (pcb->ipsec_ifp != NULL) {
3290 // Only can set before connecting
3291 result = EINVAL;
3292 break;
3293 }
3294 u_int32_t ring_size = *(u_int32_t *)data;
3295 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3296 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3297 return EINVAL;
3298 }
3299 pcb->ipsec_rx_fsw_ring_size = ring_size;
cb323159
A
3300 if (if_ipsec_debug != 0) {
3301 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3302 }
3303 break;
3304 }
3305 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3306 if (len != sizeof(u_int32_t)) {
3307 result = EMSGSIZE;
3308 break;
3309 }
3310 if (pcb->ipsec_ifp != NULL) {
3311 // Only can set before connecting
3312 result = EINVAL;
3313 break;
3314 }
3315 u_int32_t ring_size = *(u_int32_t *)data;
3316 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3317 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3318 return EINVAL;
3319 }
3320 pcb->ipsec_kpipe_tx_ring_size = ring_size;
3321 if (if_ipsec_debug != 0) {
3322 printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3323 }
3324 break;
3325 }
3326 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3327 if (len != sizeof(u_int32_t)) {
3328 result = EMSGSIZE;
3329 break;
3330 }
3331 if (pcb->ipsec_ifp != NULL) {
3332 // Only can set before connecting
3333 result = EINVAL;
3334 break;
3335 }
3336 u_int32_t ring_size = *(u_int32_t *)data;
3337 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3338 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3339 return EINVAL;
3340 }
3341 pcb->ipsec_kpipe_rx_ring_size = ring_size;
3342 if (if_ipsec_debug != 0) {
3343 printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3344 }
0a7de745
A
3345 break;
3346 }
3347
3348#endif // IPSEC_NEXUS
3349
cb323159 3350 default: {
0a7de745
A
3351 result = ENOPROTOOPT;
3352 break;
3353 }
cb323159 3354 }
0a7de745 3355
39236c6e
A
3356 return result;
3357}
3358
3359static errno_t
5ba3f43e 3360ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
0a7de745
A
3361 __unused u_int32_t unit,
3362 void *unitinfo,
3363 int opt,
3364 void *data,
3365 size_t *len)
39236c6e 3366{
5ba3f43e
A
3367 struct ipsec_pcb *pcb = unitinfo;
3368 errno_t result = 0;
0a7de745 3369
39236c6e 3370 switch (opt) {
0a7de745
A
3371 case IPSEC_OPT_FLAGS: {
3372 if (*len != sizeof(u_int32_t)) {
3373 result = EMSGSIZE;
3374 } else {
cb323159 3375 *(u_int32_t *)data = pcb->ipsec_external_flags;
5ba3f43e 3376 }
0a7de745
A
3377 break;
3378 }
3379
3380 case IPSEC_OPT_EXT_IFDATA_STATS: {
3381 if (*len != sizeof(int)) {
3382 result = EMSGSIZE;
3383 } else {
3384 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
5ba3f43e 3385 }
0a7de745
A
3386 break;
3387 }
3388
3389 case IPSEC_OPT_IFNAME: {
3390 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
3391 result = EMSGSIZE;
3392 } else {
3393 if (pcb->ipsec_ifp == NULL) {
3394 // Only can get after connecting
3395 result = EINVAL;
3396 break;
5ba3f43e 3397 }
0a7de745 3398 *len = snprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
5ba3f43e 3399 }
0a7de745
A
3400 break;
3401 }
3402
3403 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3404 if (*len != sizeof(int)) {
3405 result = EMSGSIZE;
3406 } else {
3407 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
fe8ab488 3408 }
0a7de745
A
3409 break;
3410 }
5ba3f43e
A
3411
3412#if IPSEC_NEXUS
a39ff7e2 3413
0a7de745
A
3414 case IPSEC_OPT_ENABLE_CHANNEL: {
3415 if (*len != sizeof(int)) {
3416 result = EMSGSIZE;
3417 } else {
3418 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
3419 *(int *)data = pcb->ipsec_kpipe_count;
3420 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3421 }
3422 break;
3423 }
3424
3425 case IPSEC_OPT_CHANNEL_BIND_PID: {
3426 if (*len != sizeof(pid_t)) {
3427 result = EMSGSIZE;
3428 } else {
3429 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3430 *(pid_t *)data = pcb->ipsec_kpipe_pid;
0a7de745 3431 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
a39ff7e2 3432 }
0a7de745
A
3433 break;
3434 }
a39ff7e2 3435
0a7de745
A
3436 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3437 if (*len != sizeof(int)) {
3438 result = EMSGSIZE;
3439 } else {
cb323159 3440 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
a39ff7e2 3441 }
0a7de745
A
3442 break;
3443 }
a39ff7e2 3444
0a7de745
A
3445 case IPSEC_OPT_ENABLE_NETIF: {
3446 if (*len != sizeof(int)) {
3447 result = EMSGSIZE;
3448 } else {
5ba3f43e 3449 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
0a7de745 3450 *(int *)data = !!pcb->ipsec_use_netif;
5ba3f43e 3451 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 3452 }
0a7de745
A
3453 break;
3454 }
5ba3f43e 3455
0a7de745
A
3456 case IPSEC_OPT_GET_CHANNEL_UUID: {
3457 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159 3458 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
0a7de745 3459 result = ENXIO;
cb323159 3460 } else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
0a7de745
A
3461 result = EMSGSIZE;
3462 } else {
cb323159
A
3463 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3464 uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3465 }
5ba3f43e 3466 }
0a7de745
A
3467 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3468 break;
3469 }
3470
3471 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3472 if (*len != sizeof(u_int32_t)) {
3473 result = EMSGSIZE;
3474 } else {
3475 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
5c9f4661 3476 }
0a7de745
A
3477 break;
3478 }
3479 case IPSEC_OPT_SLOT_SIZE: {
3480 if (*len != sizeof(u_int32_t)) {
3481 result = EMSGSIZE;
3482 } else {
3483 *(u_int32_t *)data = pcb->ipsec_slot_size;
5c9f4661 3484 }
0a7de745
A
3485 break;
3486 }
3487 case IPSEC_OPT_NETIF_RING_SIZE: {
3488 if (*len != sizeof(u_int32_t)) {
3489 result = EMSGSIZE;
3490 } else {
3491 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
5c9f4661 3492 }
0a7de745
A
3493 break;
3494 }
3495 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3496 if (*len != sizeof(u_int32_t)) {
3497 result = EMSGSIZE;
3498 } else {
3499 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3500 }
3501 break;
3502 }
3503 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3504 if (*len != sizeof(u_int32_t)) {
3505 result = EMSGSIZE;
3506 } else {
3507 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
5c9f4661 3508 }
0a7de745
A
3509 break;
3510 }
cb323159
A
3511 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3512 if (*len != sizeof(u_int32_t)) {
3513 result = EMSGSIZE;
3514 } else {
3515 *(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3516 }
3517 break;
3518 }
3519 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3520 if (*len != sizeof(u_int32_t)) {
3521 result = EMSGSIZE;
3522 } else {
3523 *(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3524 }
3525 break;
3526 }
5c9f4661 3527
5ba3f43e
A
3528#endif // IPSEC_NEXUS
3529
0a7de745
A
3530 default: {
3531 result = ENOPROTOOPT;
3532 break;
3533 }
39236c6e 3534 }
0a7de745 3535
39236c6e
A
3536 return result;
3537}
3538
3539/* Network Interface functions */
3540static errno_t
5ba3f43e 3541ipsec_output(ifnet_t interface,
0a7de745 3542 mbuf_t data)
39236c6e 3543{
5ba3f43e 3544 struct ipsec_pcb *pcb = ifnet_softc(interface);
0a7de745
A
3545 struct ipsec_output_state ipsec_state;
3546 struct route ro;
3547 struct route_in6 ro6;
3548 int length;
3549 struct ip *ip;
3550 struct ip6_hdr *ip6;
3551 struct ip_out_args ipoa;
3552 struct ip6_out_args ip6oa;
3553 int error = 0;
3554 u_int ip_version = 0;
3555 int flags = 0;
3556 struct flowadv *adv = NULL;
3557
fe8ab488
A
3558 // Make sure this packet isn't looping through the interface
3559 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
5ba3f43e 3560 error = EINVAL;
fe8ab488
A
3561 goto ipsec_output_err;
3562 }
0a7de745 3563
fe8ab488
A
3564 // Mark the interface so NECP can evaluate tunnel policy
3565 necp_mark_packet_from_interface(data, interface);
0a7de745
A
3566
3567 ip = mtod(data, struct ip *);
3568 ip_version = ip->ip_v;
3569
3570 switch (ip_version) {
3571 case 4: {
5c9f4661 3572#if IPSEC_NEXUS
0a7de745 3573 if (!pcb->ipsec_use_netif)
5c9f4661 3574#endif // IPSEC_NEXUS
0a7de745
A
3575 {
3576 int af = AF_INET;
3577 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3578 }
3579
3580 /* Apply encryption */
3581 memset(&ipsec_state, 0, sizeof(ipsec_state));
3582 ipsec_state.m = data;
3583 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3584 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3585
3586 error = ipsec4_interface_output(&ipsec_state, interface);
3587 /* Tunneled in IPv6 - packet is gone */
3588 if (error == 0 && ipsec_state.tunneled == 6) {
3589 goto done;
3590 }
3591
3592 data = ipsec_state.m;
3593 if (error || data == NULL) {
3594 if (error) {
cb323159 3595 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
5c9f4661 3596 }
0a7de745
A
3597 goto ipsec_output_err;
3598 }
5c9f4661 3599
0a7de745
A
3600 /* Set traffic class, set flow */
3601 m_set_service_class(data, pcb->ipsec_output_service_class);
3602 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3603 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3604 data->m_pkthdr.pkt_proto = ip->ip_p;
3605 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3606
3607 /* Flip endian-ness for ip_output */
3608 ip = mtod(data, struct ip *);
3609 NTOHS(ip->ip_len);
3610 NTOHS(ip->ip_off);
3611
3612 /* Increment statistics */
3613 length = mbuf_pkthdr_len(data);
3614 ifnet_stat_increment_out(interface, 1, length, 0);
3615
3616 /* Send to ip_output */
3617 memset(&ro, 0, sizeof(ro));
3618
3619 flags = (IP_OUTARGS | /* Passing out args to specify interface */
3620 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
3621
3622 memset(&ipoa, 0, sizeof(ipoa));
3623 ipoa.ipoa_flowadv.code = 0;
3624 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3625 if (ipsec_state.outgoing_if) {
3626 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3627 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
5ba3f43e 3628 }
0a7de745
A
3629 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3630
3631 adv = &ipoa.ipoa_flowadv;
3632
3633 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3634 data = NULL;
3635
3636 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3637 error = ENOBUFS;
3638 ifnet_disable_output(interface);
3639 }
3640
3641 goto done;
3642 }
3643 case 6: {
5c9f4661 3644#if IPSEC_NEXUS
0a7de745 3645 if (!pcb->ipsec_use_netif)
5c9f4661 3646#endif // IPSEC_NEXUS
0a7de745
A
3647 {
3648 int af = AF_INET6;
3649 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3650 }
5c9f4661 3651
0a7de745
A
3652 data = ipsec6_splithdr(data);
3653 if (data == NULL) {
cb323159 3654 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
0a7de745
A
3655 goto ipsec_output_err;
3656 }
3e170ce0 3657
0a7de745
A
3658 ip6 = mtod(data, struct ip6_hdr *);
3659
3660 memset(&ipsec_state, 0, sizeof(ipsec_state));
3661 ipsec_state.m = data;
3662 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3663 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3664
3665 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3666 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
3667 goto done;
3668 }
3669 data = ipsec_state.m;
3670 if (error || data == NULL) {
3671 if (error) {
cb323159 3672 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
5ba3f43e 3673 }
0a7de745 3674 goto ipsec_output_err;
5ba3f43e 3675 }
0a7de745
A
3676
3677 /* Set traffic class, set flow */
3678 m_set_service_class(data, pcb->ipsec_output_service_class);
3679 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3680 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3681 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3682 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3683
3684 /* Increment statistics */
3685 length = mbuf_pkthdr_len(data);
3686 ifnet_stat_increment_out(interface, 1, length, 0);
3687
3688 /* Send to ip6_output */
3689 memset(&ro6, 0, sizeof(ro6));
3690
3691 flags = IPV6_OUTARGS;
3692
3693 memset(&ip6oa, 0, sizeof(ip6oa));
3694 ip6oa.ip6oa_flowadv.code = 0;
3695 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3696 if (ipsec_state.outgoing_if) {
3697 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3698 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
5ba3f43e 3699 }
0a7de745
A
3700 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3701
3702 adv = &ip6oa.ip6oa_flowadv;
3703
3704 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3705 data = NULL;
3706
3707 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3708 error = ENOBUFS;
3709 ifnet_disable_output(interface);
3710 }
3711
3712 goto done;
3713 }
3714 default: {
cb323159 3715 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
0a7de745
A
3716 error = EINVAL;
3717 goto ipsec_output_err;
3718 }
3719 }
3720
39236c6e 3721done:
0a7de745
A
3722 return error;
3723
39236c6e 3724ipsec_output_err:
0a7de745
A
3725 if (data) {
3726 mbuf_freem(data);
3727 }
39236c6e
A
3728 goto done;
3729}
3730
3731static void
0a7de745 3732ipsec_start(ifnet_t interface)
39236c6e 3733{
fe8ab488 3734 mbuf_t data;
5ba3f43e 3735 struct ipsec_pcb *pcb = ifnet_softc(interface);
fe8ab488 3736
5ba3f43e 3737 VERIFY(pcb != NULL);
fe8ab488 3738 for (;;) {
0a7de745 3739 if (ifnet_dequeue(interface, &data) != 0) {
fe8ab488 3740 break;
0a7de745
A
3741 }
3742 if (ipsec_output(interface, data) != 0) {
fe8ab488 3743 break;
0a7de745 3744 }
fe8ab488 3745 }
39236c6e
A
3746}
3747
3748/* Network Interface functions */
3749static errno_t
0a7de745
A
3750ipsec_demux(__unused ifnet_t interface,
3751 mbuf_t data,
3752 __unused char *frame_header,
3753 protocol_family_t *protocol)
39236c6e 3754{
0a7de745
A
3755 struct ip *ip;
3756 u_int ip_version;
3757
39236c6e
A
3758 while (data != NULL && mbuf_len(data) < 1) {
3759 data = mbuf_next(data);
3760 }
0a7de745
A
3761
3762 if (data == NULL) {
39236c6e 3763 return ENOENT;
39236c6e 3764 }
0a7de745
A
3765
3766 ip = mtod(data, struct ip *);
3767 ip_version = ip->ip_v;
3768
3769 switch (ip_version) {
3770 case 4:
3771 *protocol = PF_INET;
3772 return 0;
3773 case 6:
3774 *protocol = PF_INET6;
3775 return 0;
3776 default:
3777 break;
3778 }
3779
39236c6e
A
3780 return 0;
3781}
3782
3783static errno_t
0a7de745
A
3784ipsec_add_proto(__unused ifnet_t interface,
3785 protocol_family_t protocol,
3786 __unused const struct ifnet_demux_desc *demux_array,
3787 __unused u_int32_t demux_count)
39236c6e 3788{
0a7de745
A
3789 switch (protocol) {
3790 case PF_INET:
3791 return 0;
3792 case PF_INET6:
3793 return 0;
3794 default:
3795 break;
39236c6e 3796 }
0a7de745 3797
39236c6e
A
3798 return ENOPROTOOPT;
3799}
3800
3801static errno_t
0a7de745
A
3802ipsec_del_proto(__unused ifnet_t interface,
3803 __unused protocol_family_t protocol)
39236c6e
A
3804{
3805 return 0;
3806}
3807
3808static errno_t
5ba3f43e 3809ipsec_ioctl(ifnet_t interface,
0a7de745
A
3810 u_long command,
3811 void *data)
39236c6e 3812{
d9a64523
A
3813#if IPSEC_NEXUS
3814 struct ipsec_pcb *pcb = ifnet_softc(interface);
3815#endif
0a7de745
A
3816 errno_t result = 0;
3817
3818 switch (command) {
3819 case SIOCSIFMTU: {
5ba3f43e 3820#if IPSEC_NEXUS
0a7de745
A
3821 if (pcb->ipsec_use_netif) {
3822 // Make sure we can fit packets in the channel buffers
3823 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
3824 result = EINVAL;
3825 } else {
3826 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
5c9f4661 3827 }
0a7de745
A
3828 } else
3829#endif // IPSEC_NEXUS
3830 {
3831 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
5c9f4661 3832 }
0a7de745 3833 break;
39236c6e 3834 }
0a7de745
A
3835
3836 case SIOCSIFFLAGS:
3837 /* ifioctl() takes care of it */
3838 break;
3839
cb323159
A
3840 case SIOCSIFSUBFAMILY: {
3841 uint32_t subfamily;
3842
3843 subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
3844 switch (subfamily) {
3845 case IFRTYPE_SUBFAMILY_BLUETOOTH:
3846 interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
3847 break;
3848 case IFRTYPE_SUBFAMILY_WIFI:
3849 interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
3850 break;
3851 case IFRTYPE_SUBFAMILY_QUICKRELAY:
3852 interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
3853 break;
3854 case IFRTYPE_SUBFAMILY_DEFAULT:
3855 interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
3856 break;
3857 default:
3858 result = EINVAL;
3859 break;
3860 }
3861 break;
3862 }
3863
0a7de745
A
3864 default:
3865 result = EOPNOTSUPP;
3866 }
3867
39236c6e
A
3868 return result;
3869}
3870
3871static void
5ba3f43e 3872ipsec_detached(ifnet_t interface)
39236c6e 3873{
5ba3f43e 3874 struct ipsec_pcb *pcb = ifnet_softc(interface);
cb323159 3875
5ba3f43e 3876 (void)ifnet_release(interface);
5c9f4661 3877 ipsec_free_pcb(pcb, true);
39236c6e
A
3878}
3879
3880/* Protocol Handlers */
3881
3882static errno_t
fe8ab488 3883ipsec_proto_input(ifnet_t interface,
0a7de745
A
3884 protocol_family_t protocol,
3885 mbuf_t m,
3886 __unused char *frame_header)
39236c6e 3887{
fe8ab488 3888 mbuf_pkthdr_setrcvif(m, interface);
5c9f4661
A
3889
3890#if IPSEC_NEXUS
3891 struct ipsec_pcb *pcb = ifnet_softc(interface);
3892 if (!pcb->ipsec_use_netif)
3893#endif // IPSEC_NEXUS
3894 {
3895 uint32_t af = 0;
3896 struct ip *ip = mtod(m, struct ip *);
3897 if (ip->ip_v == 4) {
3898 af = AF_INET;
3899 } else if (ip->ip_v == 6) {
3900 af = AF_INET6;
3901 }
3902 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
a39ff7e2 3903 pktap_input(interface, protocol, m, NULL);
5c9f4661 3904 }
39037602 3905
0a7de745 3906 int32_t pktlen = m->m_pkthdr.len;
3e170ce0
A
3907 if (proto_input(protocol, m) != 0) {
3908 ifnet_stat_increment_in(interface, 0, 0, 1);
39236c6e 3909 m_freem(m);
3e170ce0 3910 } else {
a39ff7e2 3911 ifnet_stat_increment_in(interface, 1, pktlen, 0);
3e170ce0 3912 }
0a7de745 3913
39236c6e
A
3914 return 0;
3915}
3916
3917static errno_t
0a7de745
A
3918ipsec_proto_pre_output(__unused ifnet_t interface,
3919 protocol_family_t protocol,
3920 __unused mbuf_t *packet,
3921 __unused const struct sockaddr *dest,
3922 __unused void *route,
3923 __unused char *frame_type,
3924 __unused char *link_layer_dest)
39236c6e 3925{
39236c6e
A
3926 *(protocol_family_t *)(void *)frame_type = protocol;
3927 return 0;
3928}
3929
3930static errno_t
0a7de745
A
3931ipsec_attach_proto(ifnet_t interface,
3932 protocol_family_t protocol)
39236c6e 3933{
0a7de745
A
3934 struct ifnet_attach_proto_param proto;
3935 errno_t result;
3936
39236c6e
A
3937 bzero(&proto, sizeof(proto));
3938 proto.input = ipsec_proto_input;
3939 proto.pre_output = ipsec_proto_pre_output;
0a7de745 3940
39236c6e
A
3941 result = ifnet_attach_protocol(interface, protocol, &proto);
3942 if (result != 0 && result != EEXIST) {
cb323159 3943 os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
0a7de745 3944 protocol, result);
39236c6e 3945 }
0a7de745 3946
39236c6e
A
3947 return result;
3948}
fe8ab488 3949
5ba3f43e 3950errno_t
0a7de745
A
3951ipsec_inject_inbound_packet(ifnet_t interface,
3952 mbuf_t packet)
5ba3f43e 3953{
a39ff7e2 3954#if IPSEC_NEXUS
5ba3f43e
A
3955 struct ipsec_pcb *pcb = ifnet_softc(interface);
3956
5c9f4661 3957 if (pcb->ipsec_use_netif) {
cb323159
A
3958 if (!ipsec_data_move_begin(pcb)) {
3959 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
3960 if_name(pcb->ipsec_ifp));
3961 return ENXIO;
3962 }
3963
5c9f4661 3964 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 3965
5c9f4661 3966 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
cb323159
A
3967
3968 if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
3969 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
3970 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3971 ipsec_data_move_end(pcb);
3972 return ENOSPC;
3973 }
3974
5c9f4661
A
3975 if (pcb->ipsec_input_chain != NULL) {
3976 pcb->ipsec_input_chain_last->m_nextpkt = packet;
3977 } else {
3978 pcb->ipsec_input_chain = packet;
3979 }
cb323159 3980 pcb->ipsec_input_chain_count++;
5c9f4661
A
3981 while (packet->m_nextpkt) {
3982 VERIFY(packet != packet->m_nextpkt);
3983 packet = packet->m_nextpkt;
cb323159 3984 pcb->ipsec_input_chain_count++;
5c9f4661
A
3985 }
3986 pcb->ipsec_input_chain_last = packet;
3987 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
5ba3f43e 3988
cb323159 3989 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
5c9f4661 3990 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 3991
5c9f4661
A
3992 if (rx_ring != NULL) {
3993 kern_channel_notify(rx_ring, 0);
3994 }
5ba3f43e 3995
cb323159 3996 ipsec_data_move_end(pcb);
0a7de745 3997 return 0;
5c9f4661
A
3998 } else
3999#endif // IPSEC_NEXUS
4000 {
4001 errno_t error;
4002 protocol_family_t protocol;
4003 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4004 return error;
4005 }
5ba3f43e 4006
5c9f4661
A
4007 return ipsec_proto_input(interface, protocol, packet, NULL);
4008 }
fe8ab488
A
4009}
4010
4011void
4012ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family)
4013{
4014 if (packet != NULL && interface != NULL) {
4015 struct ipsec_pcb *pcb = ifnet_softc(interface);
4016 if (pcb != NULL) {
4017 /* Set traffic class, set flow */
4018 m_set_service_class(packet, pcb->ipsec_output_service_class);
4019 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4020 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4021 if (family == AF_INET) {
4022 struct ip *ip = mtod(packet, struct ip *);
4023 packet->m_pkthdr.pkt_proto = ip->ip_p;
3e170ce0 4024 } else if (family == AF_INET6) {
fe8ab488
A
4025 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4026 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4027 }
4028 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4029 }
4030 }
4031}
39037602
A
4032
4033void
4034ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4035{
4036 struct ipsec_pcb *pcb;
0a7de745
A
4037
4038 if (interface == NULL || ipoa == NULL) {
39037602 4039 return;
0a7de745 4040 }
39037602 4041 pcb = ifnet_softc(interface);
0a7de745 4042
39037602
A
4043 if (net_qos_policy_restricted == 0) {
4044 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4045 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4046 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
0a7de745 4047 net_qos_policy_restrict_avapps != 0) {
39037602
A
4048 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4049 } else {
4050 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4051 ipoa->ipoa_sotc = SO_TC_VO;
4052 }
4053}
4054
4055void
4056ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4057{
4058 struct ipsec_pcb *pcb;
0a7de745
A
4059
4060 if (interface == NULL || ip6oa == NULL) {
39037602 4061 return;
0a7de745 4062 }
39037602 4063 pcb = ifnet_softc(interface);
0a7de745 4064
39037602
A
4065 if (net_qos_policy_restricted == 0) {
4066 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4067 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4068 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
0a7de745 4069 net_qos_policy_restrict_avapps != 0) {
39037602
A
4070 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4071 } else {
4072 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4073 ip6oa->ip6oa_sotc = SO_TC_VO;
4074 }
4075}
cb323159
A
4076
4077static boolean_t
4078ipsec_data_move_begin(struct ipsec_pcb *pcb)
4079{
4080 boolean_t ret = 0;
4081
4082 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4083 if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
4084 pcb->ipsec_pcb_data_move++;
4085 }
4086 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4087
4088 return ret;
4089}
4090
4091static void
4092ipsec_data_move_end(struct ipsec_pcb *pcb)
4093{
4094 lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
4095 VERIFY(pcb->ipsec_pcb_data_move > 0);
4096 /*
4097 * if there's no more thread moving data, wakeup any
4098 * drainers that's blocked waiting for this.
4099 */
4100 if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
4101 wakeup(&(pcb->ipsec_pcb_data_move));
4102 }
4103 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4104}
4105
4106static void
4107ipsec_data_move_drain(struct ipsec_pcb *pcb)
4108{
4109 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4110 /* data path must already be marked as not ready */
4111 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4112 pcb->ipsec_pcb_drainers++;
4113 while (pcb->ipsec_pcb_data_move != 0) {
4114 (void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4115 (PZERO - 1), __func__, NULL);
4116 }
4117 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4118 VERIFY(pcb->ipsec_pcb_drainers > 0);
4119 pcb->ipsec_pcb_drainers--;
4120 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4121}
4122
4123static void
4124ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4125{
4126 /*
4127 * Mark the data path as not usable.
4128 */
4129 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4130 IPSEC_CLR_DATA_PATH_READY(pcb);
4131 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4132
4133 /* Wait until all threads in the data paths are done. */
4134 ipsec_data_move_drain(pcb);
4135}