]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/if_ipsec.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / net / if_ipsec.c
CommitLineData
39236c6e 1/*
cb323159 2 * Copyright (c) 2012-2019 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
d9a64523 29
39236c6e
A
30#include <sys/systm.h>
31#include <sys/kern_control.h>
32#include <net/kpi_protocol.h>
33#include <net/kpi_interface.h>
34#include <sys/socket.h>
fe8ab488 35#include <sys/socketvar.h>
39236c6e
A
36#include <net/if.h>
37#include <net/if_types.h>
38#include <net/bpf.h>
39#include <net/if_ipsec.h>
39236c6e
A
40#include <sys/mbuf.h>
41#include <sys/sockio.h>
42#include <netinet/in.h>
43#include <netinet/ip6.h>
44#include <netinet6/in6_var.h>
45#include <netinet6/ip6_var.h>
46#include <sys/kauth.h>
47#include <netinet6/ipsec.h>
48#include <netinet6/ipsec6.h>
5ba3f43e
A
49#include <netinet6/esp.h>
50#include <netinet6/esp6.h>
39236c6e
A
51#include <netinet/ip.h>
52#include <net/flowadv.h>
fe8ab488 53#include <net/necp.h>
39037602
A
54#include <netkey/key.h>
55#include <net/pktap.h>
5ba3f43e 56#include <kern/zalloc.h>
cb323159 57#include <os/log.h>
5ba3f43e
A
58
59#define IPSEC_NEXUS 0
39037602
A
60
61extern int net_qos_policy_restricted;
62extern int net_qos_policy_restrict_avapps;
39236c6e
A
63
64/* Kernel Control functions */
0a7de745
A
65static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
66 void **unitinfo);
67static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
68 void **unitinfo);
69static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
70 void *unitinfo);
71static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
72 void *unitinfo, mbuf_t m, int flags);
73static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
74 int opt, void *data, size_t *len);
75static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
76 int opt, void *data, size_t len);
39236c6e
A
77
78/* Network Interface functions */
0a7de745
A
79static void ipsec_start(ifnet_t interface);
80static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
81static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
82 protocol_family_t *protocol);
83static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
84 const struct ifnet_demux_desc *demux_array,
85 u_int32_t demux_count);
86static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
87static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
88static void ipsec_detached(ifnet_t interface);
39236c6e
A
89
90/* Protocol handlers */
0a7de745
A
91static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
92static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
93 mbuf_t m, char *frame_header);
39236c6e 94static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
0a7de745
A
95 mbuf_t *packet, const struct sockaddr *dest, void *route,
96 char *frame_type, char *link_layer_dest);
39236c6e 97
0a7de745 98static kern_ctl_ref ipsec_kctlref;
5ba3f43e
A
99static lck_attr_t *ipsec_lck_attr;
100static lck_grp_attr_t *ipsec_lck_grp_attr;
101static lck_grp_t *ipsec_lck_grp;
102static lck_mtx_t ipsec_lock;
103
104#if IPSEC_NEXUS
105
106SYSCTL_DECL(_net_ipsec);
107SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
108static int if_ipsec_verify_interface_creation = 0;
109SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
110
0a7de745 111#define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
5ba3f43e 112
5c9f4661 113#define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
5ba3f43e
A
114#define IPSEC_IF_DEFAULT_RING_SIZE 64
115#define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
116#define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
0a7de745 117#define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
5ba3f43e 118
cb323159
A
119#define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
120#define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
121#define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
122#define IPSEC_NETIF_WMM_RX_RING_COUNT 1
123#define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
124#define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
125
126#define IPSEC_IF_MIN_RING_SIZE 8
5ba3f43e
A
127#define IPSEC_IF_MAX_RING_SIZE 1024
128
5c9f4661
A
129#define IPSEC_IF_MIN_SLOT_SIZE 1024
130#define IPSEC_IF_MAX_SLOT_SIZE 4096
131
cb323159
A
132#define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
133
134static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
135
5ba3f43e
A
136static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
137static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
138static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
139
140static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
141static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
142static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
143
cb323159 144SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
5ba3f43e 145SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
0a7de745 146 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
5ba3f43e 147SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
0a7de745 148 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
5ba3f43e 149SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
0a7de745 150 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
5ba3f43e 151
cb323159
A
152static int if_ipsec_debug = 0;
153SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
154
5ba3f43e
A
155static errno_t
156ipsec_register_nexus(void);
157
158typedef struct ipsec_nx {
159 uuid_t if_provider;
160 uuid_t if_instance;
cb323159
A
161 uuid_t fsw_provider;
162 uuid_t fsw_instance;
163 uuid_t fsw_device;
164 uuid_t fsw_host;
165 uuid_t fsw_agent;
5ba3f43e
A
166} *ipsec_nx_t;
167
168static nexus_controller_t ipsec_ncd;
169static int ipsec_ncd_refcount;
170static uuid_t ipsec_kpipe_uuid;
171
172#endif // IPSEC_NEXUS
173
174/* Control block allocated for each kernel control connection */
175struct ipsec_pcb {
0a7de745
A
176 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
177 kern_ctl_ref ipsec_ctlref;
cb323159
A
178 ifnet_t ipsec_ifp;
179 u_int32_t ipsec_unit;
180 u_int32_t ipsec_unique_id;
181 // These external flags can be set with IPSEC_OPT_FLAGS
182 u_int32_t ipsec_external_flags;
183 // These internal flags are only used within this driver
184 u_int32_t ipsec_internal_flags;
185 u_int32_t ipsec_input_frag_size;
186 bool ipsec_frag_size_set;
187 int ipsec_ext_ifdata_stats;
0a7de745 188 mbuf_svc_class_t ipsec_output_service_class;
cb323159
A
189 char ipsec_if_xname[IFXNAMSIZ];
190 char ipsec_unique_name[IFXNAMSIZ];
191 // PCB lock protects state fields, like ipsec_kpipe_count
5ba3f43e 192 decl_lck_rw_data(, ipsec_pcb_lock);
cb323159
A
193 // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
194 decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
195 u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */
196 u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */
197 u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */
5ba3f43e
A
198
199#if IPSEC_NEXUS
cb323159
A
200 lck_mtx_t ipsec_input_chain_lock;
201 lck_mtx_t ipsec_kpipe_encrypt_lock;
202 lck_mtx_t ipsec_kpipe_decrypt_lock;
0a7de745
A
203 struct mbuf * ipsec_input_chain;
204 struct mbuf * ipsec_input_chain_last;
cb323159 205 u_int32_t ipsec_input_chain_count;
5ba3f43e
A
206 // Input chain lock protects the list of input mbufs
207 // The input chain lock must be taken AFTER the PCB lock if both are held
0a7de745 208 struct ipsec_nx ipsec_nx;
cb323159
A
209 u_int32_t ipsec_kpipe_count;
210 pid_t ipsec_kpipe_pid;
211 uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
212 void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
213 void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
214 kern_pbufpool_t ipsec_kpipe_pp;
215 u_int32_t ipsec_kpipe_tx_ring_size;
216 u_int32_t ipsec_kpipe_rx_ring_size;
0a7de745
A
217
218 kern_nexus_t ipsec_netif_nexus;
cb323159
A
219 kern_pbufpool_t ipsec_netif_pp;
220 void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
221 void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
222 uint64_t ipsec_netif_txring_size;
223
224 u_int32_t ipsec_slot_size;
225 u_int32_t ipsec_netif_ring_size;
226 u_int32_t ipsec_tx_fsw_ring_size;
227 u_int32_t ipsec_rx_fsw_ring_size;
228 bool ipsec_use_netif;
229 bool ipsec_needs_netagent;
5ba3f43e
A
230#endif // IPSEC_NEXUS
231};
232
cb323159
A
233/* These are internal flags not exposed outside this file */
234#define IPSEC_FLAGS_KPIPE_ALLOCATED 1
235
236/* data movement refcounting functions */
cb323159
A
237static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
238
239/* Data path states */
240#define IPSEC_PCB_DATA_PATH_READY 0x1
241
242/* Macros to set/clear/test data path states */
243#define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
244#define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
245#define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
246
247#if IPSEC_NEXUS
248/* Macros to clear/set/test flags. */
249static inline void
250ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
251{
252 pcb->ipsec_internal_flags |= flag;
253}
254static inline void
255ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
256{
257 pcb->ipsec_internal_flags &= ~flag;
258}
259
260static inline bool
261ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
262{
263 return !!(pcb->ipsec_internal_flags & flag);
264}
265#endif // IPSEC_NEXUS
266
5ba3f43e
A
267TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
268
0a7de745
A
269#define IPSEC_PCB_ZONE_MAX 32
270#define IPSEC_PCB_ZONE_NAME "net.if_ipsec"
5ba3f43e 271
0a7de745
A
272static unsigned int ipsec_pcb_size; /* size of zone element */
273static struct zone *ipsec_pcb_zone; /* zone for ipsec_pcb */
5ba3f43e
A
274
275#define IPSECQ_MAXLEN 256
276
277#if IPSEC_NEXUS
278static int
279sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
280{
281#pragma unused(arg1, arg2)
282 int value = if_ipsec_ring_size;
283
284 int error = sysctl_handle_int(oidp, &value, 0, req);
285 if (error || !req->newptr) {
0a7de745 286 return error;
5ba3f43e
A
287 }
288
289 if (value < IPSEC_IF_MIN_RING_SIZE ||
0a7de745
A
290 value > IPSEC_IF_MAX_RING_SIZE) {
291 return EINVAL;
5ba3f43e
A
292 }
293
294 if_ipsec_ring_size = value;
295
0a7de745 296 return 0;
5ba3f43e
A
297}
298
299static int
300sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
301{
302#pragma unused(arg1, arg2)
303 int value = if_ipsec_tx_fsw_ring_size;
304
305 int error = sysctl_handle_int(oidp, &value, 0, req);
306 if (error || !req->newptr) {
0a7de745 307 return error;
5ba3f43e
A
308 }
309
310 if (value < IPSEC_IF_MIN_RING_SIZE ||
0a7de745
A
311 value > IPSEC_IF_MAX_RING_SIZE) {
312 return EINVAL;
5ba3f43e
A
313 }
314
315 if_ipsec_tx_fsw_ring_size = value;
316
0a7de745 317 return 0;
5ba3f43e
A
318}
319
320static int
321sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
322{
323#pragma unused(arg1, arg2)
324 int value = if_ipsec_rx_fsw_ring_size;
325
326 int error = sysctl_handle_int(oidp, &value, 0, req);
327 if (error || !req->newptr) {
0a7de745 328 return error;
5ba3f43e
A
329 }
330
331 if (value < IPSEC_IF_MIN_RING_SIZE ||
0a7de745
A
332 value > IPSEC_IF_MAX_RING_SIZE) {
333 return EINVAL;
5ba3f43e
A
334 }
335
336 if_ipsec_rx_fsw_ring_size = value;
337
0a7de745 338 return 0;
5ba3f43e 339}
cb323159
A
340
341
342static inline bool
343ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
344{
345 return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
346}
347
5ba3f43e
A
348#endif // IPSEC_NEXUS
349
350errno_t
351ipsec_register_control(void)
352{
0a7de745
A
353 struct kern_ctl_reg kern_ctl;
354 errno_t result = 0;
355
5ba3f43e
A
356 ipsec_pcb_size = sizeof(struct ipsec_pcb);
357 ipsec_pcb_zone = zinit(ipsec_pcb_size,
0a7de745
A
358 IPSEC_PCB_ZONE_MAX * ipsec_pcb_size,
359 0, IPSEC_PCB_ZONE_NAME);
5ba3f43e 360 if (ipsec_pcb_zone == NULL) {
cb323159 361 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - zinit(ipsec_pcb) failed");
5ba3f43e
A
362 return ENOMEM;
363 }
364
365#if IPSEC_NEXUS
366 ipsec_register_nexus();
367#endif // IPSEC_NEXUS
368
369 TAILQ_INIT(&ipsec_head);
0a7de745 370
5ba3f43e
A
371 bzero(&kern_ctl, sizeof(kern_ctl));
372 strlcpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
373 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
374 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */
375 kern_ctl.ctl_sendsize = 64 * 1024;
376 kern_ctl.ctl_recvsize = 64 * 1024;
5c9f4661 377 kern_ctl.ctl_bind = ipsec_ctl_bind;
5ba3f43e
A
378 kern_ctl.ctl_connect = ipsec_ctl_connect;
379 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
380 kern_ctl.ctl_send = ipsec_ctl_send;
381 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
382 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
0a7de745 383
5ba3f43e
A
384 result = ctl_register(&kern_ctl, &ipsec_kctlref);
385 if (result != 0) {
cb323159 386 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
5ba3f43e
A
387 return result;
388 }
0a7de745 389
5ba3f43e 390 /* Register the protocol plumbers */
cb323159 391 if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC,
0a7de745 392 ipsec_attach_proto, NULL)) != 0) {
cb323159
A
393 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
394 result);
5ba3f43e
A
395 ctl_deregister(ipsec_kctlref);
396 return result;
397 }
0a7de745 398
5ba3f43e 399 /* Register the protocol plumbers */
cb323159 400 if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC,
0a7de745 401 ipsec_attach_proto, NULL)) != 0) {
cb323159 402 proto_unregister_plumber(PF_INET, IFNET_FAMILY_IPSEC);
5ba3f43e 403 ctl_deregister(ipsec_kctlref);
cb323159
A
404 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
405 result);
5ba3f43e
A
406 return result;
407 }
408
409 ipsec_lck_attr = lck_attr_alloc_init();
410 ipsec_lck_grp_attr = lck_grp_attr_alloc_init();
411 ipsec_lck_grp = lck_grp_alloc_init("ipsec", ipsec_lck_grp_attr);
412 lck_mtx_init(&ipsec_lock, ipsec_lck_grp, ipsec_lck_attr);
0a7de745 413
5ba3f43e
A
414 return 0;
415}
416
417/* Helpers */
418int
0a7de745 419ipsec_interface_isvalid(ifnet_t interface)
5ba3f43e 420{
0a7de745
A
421 struct ipsec_pcb *pcb = NULL;
422
423 if (interface == NULL) {
424 return 0;
425 }
426
427 pcb = ifnet_softc(interface);
428
429 if (pcb == NULL) {
430 return 0;
431 }
432
433 /* When ctl disconnects, ipsec_unit is set to 0 */
434 if (pcb->ipsec_unit == 0) {
435 return 0;
436 }
437
438 return 1;
5ba3f43e
A
439}
440
d9a64523 441#if IPSEC_NEXUS
9d749ea3
A
442boolean_t
443ipsec_interface_needs_netagent(ifnet_t interface)
444{
445 struct ipsec_pcb *pcb = NULL;
446
447 if (interface == NULL) {
0a7de745 448 return FALSE;
9d749ea3
A
449 }
450
451 pcb = ifnet_softc(interface);
452
453 if (pcb == NULL) {
0a7de745 454 return FALSE;
9d749ea3
A
455 }
456
0a7de745 457 return pcb->ipsec_needs_netagent == true;
9d749ea3 458}
d9a64523 459#endif // IPSEC_NEXUS
9d749ea3 460
5ba3f43e
A
461static errno_t
462ipsec_ifnet_set_attrs(ifnet_t ifp)
463{
464 /* Set flags and additional information. */
465 ifnet_set_mtu(ifp, 1500);
466 ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
467
468 /* The interface must generate its own IPv6 LinkLocal address,
469 * if possible following the recommendation of RFC2472 to the 64bit interface ID
470 */
471 ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
472
473#if !IPSEC_NEXUS
474 /* Reset the stats in case as the interface may have been recycled */
475 struct ifnet_stats_param stats;
476 bzero(&stats, sizeof(struct ifnet_stats_param));
477 ifnet_set_stat(ifp, &stats);
478#endif // !IPSEC_NEXUS
479
0a7de745 480 return 0;
5ba3f43e
A
481}
482
483#if IPSEC_NEXUS
484
485static uuid_t ipsec_nx_dom_prov;
486
487static errno_t
488ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
489{
490 return 0;
491}
492
493static void
494ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
495{
496 // Ignore
497}
498
499static errno_t
500ipsec_register_nexus(void)
501{
502 const struct kern_nexus_domain_provider_init dp_init = {
503 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
504 .nxdpi_flags = 0,
505 .nxdpi_init = ipsec_nxdp_init,
506 .nxdpi_fini = ipsec_nxdp_fini
507 };
508 errno_t err = 0;
509
510 /* ipsec_nxdp_init() is called before this function returns */
511 err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
0a7de745
A
512 (const uint8_t *) "com.apple.ipsec",
513 &dp_init, sizeof(dp_init),
514 &ipsec_nx_dom_prov);
5ba3f43e 515 if (err != 0) {
cb323159 516 os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
0a7de745 517 return err;
5ba3f43e 518 }
0a7de745 519 return 0;
5ba3f43e
A
520}
521
522static errno_t
523ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
524{
525 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
526 pcb->ipsec_netif_nexus = nexus;
0a7de745 527 return ipsec_ifnet_set_attrs(ifp);
5ba3f43e
A
528}
529
530static errno_t
531ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
0a7de745
A
532 proc_t p, kern_nexus_t nexus,
533 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
5ba3f43e
A
534{
535#pragma unused(nxprov, p)
536#pragma unused(nexus, nexus_port, channel, ch_ctx)
0a7de745 537 return 0;
5ba3f43e
A
538}
539
540static errno_t
541ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 542 kern_channel_t channel)
5ba3f43e
A
543{
544#pragma unused(nxprov, channel)
545 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
546 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, 1);
cb323159
A
547 /* Mark the data path as ready */
548 if (ok) {
549 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
550 IPSEC_SET_DATA_PATH_READY(pcb);
551 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
552 }
0a7de745 553 return ok ? 0 : ENXIO;
5ba3f43e
A
554}
555
556static void
557ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 558 kern_channel_t channel)
5ba3f43e 559{
cb323159
A
560#pragma unused(nxprov, channel)
561 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
562
563 VERIFY(pcb->ipsec_kpipe_count != 0);
564
565 /* Wait until all threads in the data paths are done. */
566 ipsec_wait_data_move_drain(pcb);
5ba3f43e
A
567}
568
569static void
570ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 571 kern_channel_t channel)
5ba3f43e 572{
cb323159
A
573#pragma unused(nxprov, channel)
574 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
575
576 /* Wait until all threads in the data paths are done. */
577 ipsec_wait_data_move_drain(pcb);
5ba3f43e
A
578}
579
580static void
581ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 582 kern_channel_t channel)
5ba3f43e
A
583{
584#pragma unused(nxprov, channel)
585 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
586 if (pcb->ipsec_netif_nexus == nexus) {
587 pcb->ipsec_netif_nexus = NULL;
588 }
589 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
590}
591
592static errno_t
593ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745
A
594 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
595 void **ring_ctx)
5ba3f43e
A
596{
597#pragma unused(nxprov)
598#pragma unused(channel)
5ba3f43e 599 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
cb323159
A
600 uint8_t ring_idx;
601
602 for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
603 if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->ipsec_kpipe_uuid[ring_idx])) {
604 break;
605 }
606 }
607
608 if (ring_idx == pcb->ipsec_kpipe_count) {
609 uuid_string_t uuidstr;
610 uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
611 os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
612 return ENOENT;
613 }
614
615 *ring_ctx = (void *)(uintptr_t)ring_idx;
616
5ba3f43e 617 if (!is_tx_ring) {
cb323159
A
618 VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
619 pcb->ipsec_kpipe_rxring[ring_idx] = ring;
5ba3f43e 620 } else {
cb323159
A
621 VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
622 pcb->ipsec_kpipe_txring[ring_idx] = ring;
5ba3f43e
A
623 }
624 return 0;
625}
626
627static void
628ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 629 kern_channel_ring_t ring)
5ba3f43e
A
630{
631#pragma unused(nxprov)
cb323159 632 bool found = false;
5ba3f43e 633 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
cb323159
A
634
635 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
636 if (pcb->ipsec_kpipe_rxring[i] == ring) {
637 pcb->ipsec_kpipe_rxring[i] = NULL;
638 found = true;
639 } else if (pcb->ipsec_kpipe_txring[i] == ring) {
640 pcb->ipsec_kpipe_txring[i] = NULL;
641 found = true;
642 }
5ba3f43e 643 }
cb323159 644 VERIFY(found);
5ba3f43e
A
645}
646
647static errno_t
648ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 649 kern_channel_ring_t tx_ring, uint32_t flags)
5ba3f43e
A
650{
651#pragma unused(nxprov)
652#pragma unused(flags)
653 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
654
cb323159
A
655 if (!ipsec_data_move_begin(pcb)) {
656 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
657 return 0;
658 }
659
5ba3f43e 660 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
661
662 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
5ba3f43e 663 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 664 ipsec_data_move_end(pcb);
5ba3f43e
A
665 return 0;
666 }
667
cb323159
A
668 VERIFY(pcb->ipsec_kpipe_count);
669
5ba3f43e
A
670 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
671 if (tx_slot == NULL) {
672 // Nothing to write, bail
673 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 674 ipsec_data_move_end(pcb);
5ba3f43e
A
675 return 0;
676 }
677
678 // Signal the netif ring to read
cb323159 679 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
5ba3f43e
A
680 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
681
682 if (rx_ring != NULL) {
683 kern_channel_notify(rx_ring, 0);
684 }
cb323159
A
685
686 ipsec_data_move_end(pcb);
5ba3f43e
A
687 return 0;
688}
689
690static mbuf_t
691ipsec_encrypt_mbuf(ifnet_t interface,
0a7de745 692 mbuf_t data)
5ba3f43e
A
693{
694 struct ipsec_output_state ipsec_state;
695 int error = 0;
696 uint32_t af;
697
698 // Make sure this packet isn't looping through the interface
699 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
700 error = -1;
701 goto ipsec_output_err;
702 }
703
704 // Mark the interface so NECP can evaluate tunnel policy
705 necp_mark_packet_from_interface(data, interface);
706
707 struct ip *ip = mtod(data, struct ip *);
708 u_int ip_version = ip->ip_v;
709
710 switch (ip_version) {
0a7de745
A
711 case 4: {
712 af = AF_INET;
713
714 memset(&ipsec_state, 0, sizeof(ipsec_state));
715 ipsec_state.m = data;
716 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
717 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
718
719 error = ipsec4_interface_output(&ipsec_state, interface);
720 if (error == 0 && ipsec_state.tunneled == 6) {
721 // Tunneled in IPv6 - packet is gone
722 // TODO: Don't lose mbuf
723 data = NULL;
5ba3f43e
A
724 goto done;
725 }
5ba3f43e 726
0a7de745
A
727 data = ipsec_state.m;
728 if (error || data == NULL) {
729 if (error) {
cb323159 730 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
5ba3f43e 731 }
0a7de745
A
732 goto ipsec_output_err;
733 }
734 goto done;
735 }
736 case 6: {
737 af = AF_INET6;
5ba3f43e 738
0a7de745
A
739 data = ipsec6_splithdr(data);
740 if (data == NULL) {
cb323159 741 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
0a7de745
A
742 goto ipsec_output_err;
743 }
5ba3f43e 744
0a7de745 745 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
5ba3f43e 746
0a7de745
A
747 memset(&ipsec_state, 0, sizeof(ipsec_state));
748 ipsec_state.m = data;
749 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
750 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
751
752 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
753 if (error == 0 && ipsec_state.tunneled == 4) {
754 // Tunneled in IPv4 - packet is gone
755 // TODO: Don't lose mbuf
756 data = NULL;
5ba3f43e
A
757 goto done;
758 }
0a7de745
A
759 data = ipsec_state.m;
760 if (error || data == NULL) {
761 if (error) {
cb323159 762 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
0a7de745 763 }
5ba3f43e
A
764 goto ipsec_output_err;
765 }
0a7de745
A
766 goto done;
767 }
768 default: {
cb323159 769 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
0a7de745
A
770 error = -1;
771 goto ipsec_output_err;
772 }
5ba3f43e
A
773 }
774
775done:
776 return data;
777
778ipsec_output_err:
779 if (data) {
780 mbuf_freem(data);
781 }
782 return NULL;
783}
784
785static errno_t
786ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 787 kern_channel_ring_t rx_ring, uint32_t flags)
5ba3f43e
A
788{
789#pragma unused(nxprov)
790#pragma unused(flags)
791 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
792 struct kern_channel_ring_stat_increment rx_ring_stats;
cb323159
A
793 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
794
795 if (!ipsec_data_move_begin(pcb)) {
796 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
797 return 0;
798 }
5ba3f43e
A
799
800 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
801
cb323159 802 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
5ba3f43e 803 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 804 ipsec_data_move_end(pcb);
5ba3f43e
A
805 return 0;
806 }
807
cb323159
A
808 VERIFY(pcb->ipsec_kpipe_count);
809 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
810
5ba3f43e
A
811 // Reclaim user-released slots
812 (void) kern_channel_reclaim(rx_ring);
813
814 uint32_t avail = kern_channel_available_slot_count(rx_ring);
815 if (avail == 0) {
816 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
817 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
818 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
819 ipsec_data_move_end(pcb);
5ba3f43e
A
820 return 0;
821 }
822
cb323159 823 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
5ba3f43e
A
824 if (tx_ring == NULL) {
825 // Net-If TX ring not set up yet, nothing to read
826 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
827 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
828 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
829 ipsec_data_move_end(pcb);
5ba3f43e
A
830 return 0;
831 }
832
833 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
834
835 // Unlock ipsec before entering ring
836 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
837
838 (void)kr_enter(tx_ring, TRUE);
839
840 // Lock again after entering and validate
841 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159 842 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
5ba3f43e
A
843 // Ring no longer valid
844 // Unlock first, then exit ring
845 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
846 kr_exit(tx_ring);
cb323159
A
847 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
848 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
849 ipsec_data_move_end(pcb);
5ba3f43e
A
850 return 0;
851 }
852
5ba3f43e
A
853 struct kern_channel_ring_stat_increment tx_ring_stats;
854 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
855 kern_channel_slot_t tx_pslot = NULL;
856 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
857 if (tx_slot == NULL) {
858 // Nothing to read, don't bother signalling
859 // Unlock first, then exit ring
860 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
861 kr_exit(tx_ring);
cb323159 862 ipsec_data_move_end(pcb);
5ba3f43e
A
863 return 0;
864 }
865
866 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
867 VERIFY(rx_pp != NULL);
868 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
869 kern_channel_slot_t rx_pslot = NULL;
870 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
871
872 while (rx_slot != NULL && tx_slot != NULL) {
873 size_t length = 0;
874 mbuf_t data = NULL;
875 errno_t error = 0;
876
877 // Allocate rx packet
878 kern_packet_t rx_ph = 0;
879 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
a39ff7e2 880 if (__improbable(error != 0)) {
cb323159 881 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
0a7de745 882 pcb->ipsec_ifp->if_xname);
5ba3f43e
A
883 break;
884 }
885
886 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
887
888 // Advance TX ring
889 tx_pslot = tx_slot;
890 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
891
892 if (tx_ph == 0) {
a39ff7e2 893 kern_pbufpool_free(rx_pp, rx_ph);
5ba3f43e
A
894 continue;
895 }
0a7de745 896
5ba3f43e
A
897 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
898 VERIFY(tx_buf != NULL);
899 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
900 VERIFY(tx_baddr != NULL);
901 tx_baddr += kern_buflet_get_data_offset(tx_buf);
902
903 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
904
905 length = MIN(kern_packet_get_data_length(tx_ph),
0a7de745 906 pcb->ipsec_slot_size);
5ba3f43e
A
907
908 // Increment TX stats
909 tx_ring_stats.kcrsi_slots_transferred++;
910 tx_ring_stats.kcrsi_bytes_transferred += length;
911
912 if (length > 0) {
913 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
914 if (error == 0) {
915 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
916 if (error == 0) {
917 // Encrypt and send packet
cb323159 918 lck_mtx_lock(&pcb->ipsec_kpipe_encrypt_lock);
5ba3f43e 919 data = ipsec_encrypt_mbuf(pcb->ipsec_ifp, data);
cb323159 920 lck_mtx_unlock(&pcb->ipsec_kpipe_encrypt_lock);
5ba3f43e 921 } else {
cb323159
A
922 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
923 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
924 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
925 mbuf_freem(data);
926 data = NULL;
927 }
928 } else {
cb323159
A
929 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
930 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
931 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
932 }
933 } else {
cb323159
A
934 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
935 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
936 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
937 }
938
939 if (data == NULL) {
cb323159 940 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
5ba3f43e
A
941 kern_pbufpool_free(rx_pp, rx_ph);
942 break;
943 }
944
945 length = mbuf_pkthdr_len(data);
946 if (length > rx_pp->pp_buflet_size) {
947 // Flush data
948 mbuf_freem(data);
949 kern_pbufpool_free(rx_pp, rx_ph);
cb323159 950 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
0a7de745 951 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
5ba3f43e
A
952 continue;
953 }
954
955 // Fillout rx packet
956 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
957 VERIFY(rx_buf != NULL);
958 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
959 VERIFY(rx_baddr != NULL);
960
961 // Copy-in data from mbuf to buflet
962 mbuf_copydata(data, 0, length, (void *)rx_baddr);
0a7de745 963 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
5ba3f43e
A
964
965 // Finalize and attach the packet
966 error = kern_buflet_set_data_offset(rx_buf, 0);
967 VERIFY(error == 0);
968 error = kern_buflet_set_data_length(rx_buf, length);
969 VERIFY(error == 0);
970 error = kern_packet_finalize(rx_ph);
971 VERIFY(error == 0);
972 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
973 VERIFY(error == 0);
974
cb323159
A
975 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
976 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
5ba3f43e
A
977
978 rx_ring_stats.kcrsi_slots_transferred++;
979 rx_ring_stats.kcrsi_bytes_transferred += length;
980
981 if (!pcb->ipsec_ext_ifdata_stats) {
982 ifnet_stat_increment_out(pcb->ipsec_ifp, 1, length, 0);
983 }
984
985 mbuf_freem(data);
986
987 rx_pslot = rx_slot;
988 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
989 }
990
991 if (rx_pslot) {
992 kern_channel_advance_slot(rx_ring, rx_pslot);
993 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
994 }
995
996 if (tx_pslot) {
997 kern_channel_advance_slot(tx_ring, tx_pslot);
998 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
999 (void)kern_channel_reclaim(tx_ring);
1000 }
1001
5c9f4661
A
1002 /* always reenable output */
1003 errno_t error = ifnet_enable_output(pcb->ipsec_ifp);
1004 if (error != 0) {
cb323159 1005 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
5ba3f43e
A
1006 }
1007
1008 // Unlock first, then exit ring
1009 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1010
1011 if (tx_pslot != NULL) {
1012 kern_channel_notify(tx_ring, 0);
1013 }
1014 kr_exit(tx_ring);
1015
cb323159 1016 ipsec_data_move_end(pcb);
5ba3f43e
A
1017 return 0;
1018}
1019
cb323159
A
1020static uint8_t
1021ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1022{
1023 switch (svc_class) {
1024 case KPKT_SC_VO: {
1025 return 0;
1026 }
1027 case KPKT_SC_VI: {
1028 return 1;
1029 }
1030 case KPKT_SC_BE: {
1031 return 2;
1032 }
1033 case KPKT_SC_BK: {
1034 return 3;
1035 }
1036 default: {
1037 VERIFY(0);
1038 return 0;
1039 }
1040 }
1041}
1042
5ba3f43e
A
1043static errno_t
1044ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745
A
1045 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1046 void **ring_ctx)
5ba3f43e
A
1047{
1048#pragma unused(nxprov)
1049#pragma unused(channel)
5ba3f43e 1050 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
cb323159 1051
5ba3f43e 1052 if (!is_tx_ring) {
cb323159
A
1053 VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1054 pcb->ipsec_netif_rxring[0] = ring;
5ba3f43e 1055 } else {
cb323159
A
1056 uint8_t ring_idx = 0;
1057 if (ipsec_in_wmm_mode(pcb)) {
1058 int err;
1059 kern_packet_svc_class_t svc_class;
1060 err = kern_channel_get_service_class(ring, &svc_class);
1061 VERIFY(err == 0);
1062 ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1063 VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1064 }
1065
1066 *ring_ctx = (void *)(uintptr_t)ring_idx;
1067
1068 VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1069 pcb->ipsec_netif_txring[ring_idx] = ring;
5ba3f43e
A
1070 }
1071 return 0;
1072}
1073
1074static void
1075ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 1076 kern_channel_ring_t ring)
5ba3f43e
A
1077{
1078#pragma unused(nxprov)
1079 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
cb323159
A
1080 bool found = false;
1081
1082 for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1083 if (pcb->ipsec_netif_rxring[i] == ring) {
1084 pcb->ipsec_netif_rxring[i] = NULL;
1085 VERIFY(!found);
1086 found = true;
1087 }
1088 }
1089 for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1090 if (pcb->ipsec_netif_txring[i] == ring) {
1091 pcb->ipsec_netif_txring[i] = NULL;
1092 VERIFY(!found);
1093 found = true;
1094 }
5ba3f43e 1095 }
cb323159 1096 VERIFY(found);
5ba3f43e
A
1097}
1098
1099static bool
1100ipsec_netif_check_policy(mbuf_t data)
1101{
1102 necp_kernel_policy_result necp_result = 0;
1103 necp_kernel_policy_result_parameter necp_result_parameter = {};
1104 uint32_t necp_matched_policy_id = 0;
1105
1106 // This packet has been marked with IP level policy, do not mark again.
1107 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
0a7de745 1108 return true;
5ba3f43e
A
1109 }
1110
1111 size_t length = mbuf_pkthdr_len(data);
1112 if (length < sizeof(struct ip)) {
0a7de745 1113 return false;
5ba3f43e
A
1114 }
1115
1116 struct ip *ip = mtod(data, struct ip *);
1117 u_int ip_version = ip->ip_v;
1118 switch (ip_version) {
0a7de745 1119 case 4: {
cb323159 1120 necp_matched_policy_id = necp_ip_output_find_policy_match(data, 0, NULL, NULL,
0a7de745
A
1121 &necp_result, &necp_result_parameter);
1122 break;
1123 }
1124 case 6: {
cb323159 1125 necp_matched_policy_id = necp_ip6_output_find_policy_match(data, 0, NULL, NULL,
0a7de745
A
1126 &necp_result, &necp_result_parameter);
1127 break;
1128 }
1129 default: {
1130 return false;
1131 }
5ba3f43e
A
1132 }
1133
1134 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
0a7de745 1135 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
5ba3f43e 1136 /* Drop and flow divert packets should be blocked at the IP layer */
0a7de745 1137 return false;
5ba3f43e
A
1138 }
1139
1140 necp_mark_packet_from_ip(data, necp_matched_policy_id);
0a7de745 1141 return true;
5ba3f43e
A
1142}
1143
1144static errno_t
1145ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 1146 kern_channel_ring_t tx_ring, uint32_t flags)
5ba3f43e
A
1147{
1148#pragma unused(nxprov)
1149#pragma unused(flags)
1150 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1151
1152 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1153
cb323159
A
1154 if (!ipsec_data_move_begin(pcb)) {
1155 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1156 return 0;
1157 }
1158
5ba3f43e
A
1159 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1160
1161 struct kern_channel_ring_stat_increment tx_ring_stats;
1162 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1163 kern_channel_slot_t tx_pslot = NULL;
1164 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1165
cb323159 1166 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
5ba3f43e
A
1167
1168 if (tx_slot == NULL) {
1169 // Nothing to write, don't bother signalling
1170 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 1171 ipsec_data_move_end(pcb);
5ba3f43e
A
1172 return 0;
1173 }
1174
cb323159
A
1175 if (pcb->ipsec_kpipe_count &&
1176 ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1177 // Select the corresponding kpipe rx ring
1178 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
1179 VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1180 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1181
1182 // Unlock while calling notify
5ba3f43e
A
1183 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1184
1185 // Signal the kernel pipe ring to read
1186 if (rx_ring != NULL) {
1187 kern_channel_notify(rx_ring, 0);
1188 }
cb323159
A
1189
1190 ipsec_data_move_end(pcb);
5ba3f43e
A
1191 return 0;
1192 }
1193
1194 // If we're here, we're injecting into the BSD stack
1195 while (tx_slot != NULL) {
1196 size_t length = 0;
1197 mbuf_t data = NULL;
1198
1199 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
1200
1201 // Advance TX ring
1202 tx_pslot = tx_slot;
1203 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
1204
1205 if (tx_ph == 0) {
1206 continue;
1207 }
1208
1209 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1210 VERIFY(tx_buf != NULL);
1211 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1212 VERIFY(tx_baddr != 0);
1213 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1214
1215 bpf_tap_packet_out(pcb->ipsec_ifp, DLT_RAW, tx_ph, NULL, 0);
1216
1217 length = MIN(kern_packet_get_data_length(tx_ph),
0a7de745 1218 pcb->ipsec_slot_size);
5ba3f43e
A
1219
1220 if (length > 0) {
1221 errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
1222 if (error == 0) {
1223 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1224 if (error == 0) {
1225 // Mark packet from policy
1226 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1227 necp_mark_packet_from_ip(data, policy_id);
1228
1229 // Check policy with NECP
1230 if (!ipsec_netif_check_policy(data)) {
cb323159
A
1231 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1232 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1233 mbuf_freem(data);
1234 data = NULL;
1235 } else {
1236 // Send through encryption
1237 error = ipsec_output(pcb->ipsec_ifp, data);
1238 if (error != 0) {
cb323159 1239 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
5ba3f43e
A
1240 }
1241 }
1242 } else {
cb323159
A
1243 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1244 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1245 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1246 mbuf_freem(data);
1247 data = NULL;
1248 }
1249 } else {
cb323159
A
1250 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1251 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1252 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1253 }
1254 } else {
cb323159
A
1255 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1256 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1257 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1258 }
1259
1260 if (data == NULL) {
cb323159 1261 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
5ba3f43e
A
1262 break;
1263 }
1264
cb323159
A
1265 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1266 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
5ba3f43e
A
1267
1268 tx_ring_stats.kcrsi_slots_transferred++;
1269 tx_ring_stats.kcrsi_bytes_transferred += length;
1270 }
1271
1272 if (tx_pslot) {
1273 kern_channel_advance_slot(tx_ring, tx_pslot);
1274 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1275 (void)kern_channel_reclaim(tx_ring);
1276 }
1277
1278 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 1279 ipsec_data_move_end(pcb);
5ba3f43e
A
1280
1281 return 0;
1282}
1283
1284static errno_t
cb323159
A
1285ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1286 kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
5ba3f43e
A
1287{
1288#pragma unused(nxprov)
1289 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
5ba3f43e
A
1290 boolean_t more = false;
1291 errno_t rc = 0;
5ba3f43e 1292
cb323159
A
1293 VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1294
5c9f4661
A
1295 /*
1296 * Refill and sync the ring; we may be racing against another thread doing
1297 * an RX sync that also wants to do kr_enter(), and so use the blocking
1298 * variant here.
1299 */
1300 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1301 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
cb323159
A
1302 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1303 pcb->ipsec_if_xname, ring->ckr_name, rc);
5c9f4661
A
1304 }
1305
1306 (void) kr_enter(ring, TRUE);
1307 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
1308 if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1309 // ring no longer valid
1310 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1311 kr_exit(ring);
1312 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1313 pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1314 return ENXIO;
1315 }
5c9f4661 1316
cb323159 1317 if (pcb->ipsec_kpipe_count) {
5ba3f43e
A
1318 uint32_t tx_available = kern_channel_available_slot_count(ring);
1319 if (pcb->ipsec_netif_txring_size > 0 &&
0a7de745 1320 tx_available >= pcb->ipsec_netif_txring_size - 1) {
5ba3f43e
A
1321 // No room left in tx ring, disable output for now
1322 errno_t error = ifnet_disable_output(pcb->ipsec_ifp);
1323 if (error != 0) {
cb323159 1324 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
5ba3f43e
A
1325 }
1326 }
1327 }
1328
cb323159
A
1329 if (pcb->ipsec_kpipe_count) {
1330 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
5ba3f43e
A
1331
1332 // Unlock while calling notify
1333 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1334 // Signal the kernel pipe ring to read
1335 if (rx_ring != NULL) {
1336 kern_channel_notify(rx_ring, 0);
1337 }
5ba3f43e
A
1338 } else {
1339 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1340 }
1341
5c9f4661
A
1342 kr_exit(ring);
1343
0a7de745 1344 return 0;
5ba3f43e
A
1345}
1346
cb323159
A
1347static errno_t
1348ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1349 kern_channel_ring_t ring, __unused uint32_t flags)
1350{
1351 errno_t ret = 0;
1352 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1353
1354 if (!ipsec_data_move_begin(pcb)) {
1355 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1356 return 0;
1357 }
1358
1359 if (ipsec_in_wmm_mode(pcb)) {
1360 for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1361 kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1362 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
1363 if (ret) {
1364 break;
1365 }
1366 }
1367 } else {
1368 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
1369 }
1370
1371 ipsec_data_move_end(pcb);
1372 return ret;
1373}
1374
5ba3f43e
A
1375static errno_t
1376ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
0a7de745 1377 kern_channel_ring_t rx_ring, uint32_t flags)
5ba3f43e
A
1378{
1379#pragma unused(nxprov)
1380#pragma unused(flags)
1381 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1382 struct kern_channel_ring_stat_increment rx_ring_stats;
1383
1384 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1385
cb323159
A
1386 if (!ipsec_data_move_begin(pcb)) {
1387 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1388 return 0;
1389 }
1390
5ba3f43e
A
1391 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
1392
1393 // Reclaim user-released slots
1394 (void) kern_channel_reclaim(rx_ring);
1395
cb323159 1396 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
5ba3f43e
A
1397
1398 uint32_t avail = kern_channel_available_slot_count(rx_ring);
1399 if (avail == 0) {
1400 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
cb323159 1401 ipsec_data_move_end(pcb);
5ba3f43e
A
1402 return 0;
1403 }
1404
1405 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1406 VERIFY(rx_pp != NULL);
1407 bzero(&rx_ring_stats, sizeof(rx_ring_stats));
1408 kern_channel_slot_t rx_pslot = NULL;
1409 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
1410
1411 while (rx_slot != NULL) {
1412 // Check for a waiting packet
1413 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1414 mbuf_t data = pcb->ipsec_input_chain;
1415 if (data == NULL) {
1416 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1417 break;
1418 }
1419
1420 // Allocate rx packet
1421 kern_packet_t rx_ph = 0;
1422 errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
a39ff7e2 1423 if (__improbable(error != 0)) {
cb323159
A
1424 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1425 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1426 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1427 break;
1428 }
1429
1430 // Advance waiting packets
cb323159
A
1431 if (pcb->ipsec_input_chain_count > 0) {
1432 pcb->ipsec_input_chain_count--;
1433 }
5ba3f43e
A
1434 pcb->ipsec_input_chain = data->m_nextpkt;
1435 data->m_nextpkt = NULL;
1436 if (pcb->ipsec_input_chain == NULL) {
1437 pcb->ipsec_input_chain_last = NULL;
1438 }
1439 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1440
1441 size_t length = mbuf_pkthdr_len(data);
1442
1443 if (length < sizeof(struct ip)) {
1444 // Flush data
1445 mbuf_freem(data);
1446 kern_pbufpool_free(rx_pp, rx_ph);
cb323159
A
1447 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1448 STATS_INC(nifs, NETIF_STATS_DROP);
1449 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
0a7de745 1450 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
5ba3f43e
A
1451 continue;
1452 }
1453
1454 uint32_t af = 0;
1455 struct ip *ip = mtod(data, struct ip *);
1456 u_int ip_version = ip->ip_v;
1457 switch (ip_version) {
0a7de745
A
1458 case 4: {
1459 af = AF_INET;
1460 break;
1461 }
1462 case 6: {
1463 af = AF_INET6;
1464 break;
1465 }
1466 default: {
cb323159 1467 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
0a7de745
A
1468 pcb->ipsec_ifp->if_xname, ip_version);
1469 break;
1470 }
5ba3f43e
A
1471 }
1472
1473 if (length > rx_pp->pp_buflet_size ||
0a7de745 1474 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
5ba3f43e
A
1475 // We need to fragment to send up into the netif
1476
1477 u_int32_t fragment_mtu = rx_pp->pp_buflet_size;
1478 if (pcb->ipsec_frag_size_set &&
0a7de745 1479 pcb->ipsec_input_frag_size < rx_pp->pp_buflet_size) {
5ba3f43e
A
1480 fragment_mtu = pcb->ipsec_input_frag_size;
1481 }
1482
1483 mbuf_t fragment_chain = NULL;
1484 switch (af) {
0a7de745
A
1485 case AF_INET: {
1486 // ip_fragment expects the length in host order
1487 ip->ip_len = ntohs(ip->ip_len);
1488
1489 // ip_fragment will modify the original data, don't free
1490 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1491 if (fragment_error == 0 && data != NULL) {
1492 fragment_chain = data;
1493 } else {
cb323159
A
1494 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1495 STATS_INC(nifs, NETIF_STATS_DROP);
1496 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
0a7de745
A
1497 pcb->ipsec_ifp->if_xname, length, fragment_error);
1498 }
1499 break;
1500 }
1501 case AF_INET6: {
1502 if (length < sizeof(struct ip6_hdr)) {
1503 mbuf_freem(data);
cb323159
A
1504 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1505 STATS_INC(nifs, NETIF_STATS_DROP);
1506 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
0a7de745
A
1507 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1508 } else {
1509 // ip6_do_fragmentation will free the original data on success only
1510 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
5ba3f43e 1511
0a7de745 1512 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
cb323159 1513 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid()));
5ba3f43e
A
1514 if (fragment_error == 0 && data != NULL) {
1515 fragment_chain = data;
1516 } else {
5ba3f43e 1517 mbuf_freem(data);
cb323159
A
1518 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1519 STATS_INC(nifs, NETIF_STATS_DROP);
1520 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
0a7de745 1521 pcb->ipsec_ifp->if_xname, length, fragment_error);
5ba3f43e 1522 }
5ba3f43e 1523 }
0a7de745
A
1524 break;
1525 }
1526 default: {
1527 // Cannot fragment unknown families
1528 mbuf_freem(data);
cb323159
A
1529 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1530 STATS_INC(nifs, NETIF_STATS_DROP);
1531 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
0a7de745
A
1532 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1533 break;
1534 }
5ba3f43e
A
1535 }
1536
1537 if (fragment_chain != NULL) {
1538 // Add fragments to chain before continuing
1539 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
1540 if (pcb->ipsec_input_chain != NULL) {
1541 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1542 } else {
1543 pcb->ipsec_input_chain = fragment_chain;
1544 }
cb323159 1545 pcb->ipsec_input_chain_count++;
5ba3f43e
A
1546 while (fragment_chain->m_nextpkt) {
1547 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1548 fragment_chain = fragment_chain->m_nextpkt;
cb323159 1549 pcb->ipsec_input_chain_count++;
5ba3f43e
A
1550 }
1551 pcb->ipsec_input_chain_last = fragment_chain;
1552 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
1553 }
1554
1555 // Make sure to free unused rx packet
1556 kern_pbufpool_free(rx_pp, rx_ph);
1557
1558 continue;
1559 }
1560
1561 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1562
1563 // Fillout rx packet
1564 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1565 VERIFY(rx_buf != NULL);
1566 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1567 VERIFY(rx_baddr != NULL);
1568
1569 // Copy-in data from mbuf to buflet
1570 mbuf_copydata(data, 0, length, (void *)rx_baddr);
0a7de745 1571 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
5ba3f43e
A
1572
1573 // Finalize and attach the packet
1574 error = kern_buflet_set_data_offset(rx_buf, 0);
1575 VERIFY(error == 0);
1576 error = kern_buflet_set_data_length(rx_buf, length);
1577 VERIFY(error == 0);
cb323159 1578 error = kern_packet_set_headroom(rx_ph, 0);
5ba3f43e
A
1579 VERIFY(error == 0);
1580 error = kern_packet_finalize(rx_ph);
1581 VERIFY(error == 0);
1582 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1583 VERIFY(error == 0);
1584
cb323159
A
1585 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1586 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
5ba3f43e
A
1587 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1588
1589 rx_ring_stats.kcrsi_slots_transferred++;
1590 rx_ring_stats.kcrsi_bytes_transferred += length;
1591
1592 if (!pcb->ipsec_ext_ifdata_stats) {
1593 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1594 }
1595
1596 mbuf_freem(data);
1597
1598 // Advance ring
1599 rx_pslot = rx_slot;
1600 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1601 }
1602
cb323159
A
1603 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1604 struct kern_channel_ring_stat_increment tx_ring_stats;
1605 bzero(&tx_ring_stats, sizeof(tx_ring_stats));
1606 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1607 kern_channel_slot_t tx_pslot = NULL;
1608 kern_channel_slot_t tx_slot = NULL;
1609 if (tx_ring == NULL) {
1610 // Net-If TX ring not set up yet, nothing to read
1611 goto done;
1612 }
5ba3f43e 1613
5ba3f43e 1614
cb323159
A
1615 // Unlock ipsec before entering ring
1616 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 1617
cb323159 1618 (void)kr_enter(tx_ring, TRUE);
5ba3f43e 1619
cb323159
A
1620 // Lock again after entering and validate
1621 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 1622
cb323159
A
1623 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1624 goto done;
1625 }
5ba3f43e 1626
cb323159
A
1627 tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
1628 if (tx_slot == NULL) {
1629 // Nothing to read, don't bother signalling
1630 goto done;
5ba3f43e
A
1631 }
1632
cb323159
A
1633 while (rx_slot != NULL && tx_slot != NULL) {
1634 size_t length = 0;
1635 mbuf_t data = NULL;
1636 errno_t error = 0;
1637 uint32_t af;
1638
1639 // Allocate rx packet
1640 kern_packet_t rx_ph = 0;
1641 error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
1642 if (__improbable(error != 0)) {
1643 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1644 STATS_INC(nifs, NETIF_STATS_DROP);
1645 break;
1646 }
5ba3f43e 1647
cb323159 1648 kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
5ba3f43e 1649
cb323159
A
1650 // Advance TX ring
1651 tx_pslot = tx_slot;
1652 tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
5ba3f43e 1653
cb323159
A
1654 if (tx_ph == 0) {
1655 kern_pbufpool_free(rx_pp, rx_ph);
1656 continue;
1657 }
5ba3f43e 1658
cb323159
A
1659 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1660 VERIFY(tx_buf != NULL);
1661 uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
1662 VERIFY(tx_baddr != 0);
1663 tx_baddr += kern_buflet_get_data_offset(tx_buf);
5ba3f43e 1664
cb323159
A
1665 length = MIN(kern_packet_get_data_length(tx_ph),
1666 pcb->ipsec_slot_size);
5ba3f43e 1667
cb323159
A
1668 // Increment TX stats
1669 tx_ring_stats.kcrsi_slots_transferred++;
1670 tx_ring_stats.kcrsi_bytes_transferred += length;
1671
1672 if (length >= sizeof(struct ip)) {
1673 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
5ba3f43e 1674 if (error == 0) {
cb323159
A
1675 error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
1676 if (error == 0) {
1677 lck_mtx_lock(&pcb->ipsec_kpipe_decrypt_lock);
1678 struct ip *ip = mtod(data, struct ip *);
1679 u_int ip_version = ip->ip_v;
1680 switch (ip_version) {
1681 case 4: {
1682 af = AF_INET;
1683 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
1684 ip->ip_off = ntohs(ip->ip_off);
1685
1686 if (length < ip->ip_len) {
1687 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
1688 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
1689 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1690 STATS_INC(nifs, NETIF_STATS_DROP);
1691 mbuf_freem(data);
1692 data = NULL;
1693 } else {
1694 data = esp4_input_extended(data, sizeof(struct ip), pcb->ipsec_ifp);
1695 }
1696 break;
5ba3f43e 1697 }
cb323159
A
1698 case 6: {
1699 if (length < sizeof(struct ip6_hdr)) {
1700 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
1701 pcb->ipsec_ifp->if_xname, length);
1702 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1703 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1704 mbuf_freem(data);
1705 data = NULL;
1706 } else {
cb323159
A
1707 af = AF_INET6;
1708 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1709 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
1710 if (length < ip6_len) {
1711 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
1712 pcb->ipsec_ifp->if_xname, length, ip6_len);
1713 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1714 STATS_INC(nifs, NETIF_STATS_DROP);
1715 mbuf_freem(data);
1716 data = NULL;
1717 } else {
1718 int offset = sizeof(struct ip6_hdr);
1719 esp6_input_extended(&data, &offset, ip6->ip6_nxt, pcb->ipsec_ifp);
1720 }
5ba3f43e 1721 }
cb323159 1722 break;
5ba3f43e 1723 }
cb323159
A
1724 default: {
1725 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
1726 pcb->ipsec_ifp->if_xname, ip_version);
1727 STATS_INC(nifs, NETIF_STATS_DROP);
1728 mbuf_freem(data);
1729 data = NULL;
1730 break;
1731 }
1732 }
1733 lck_mtx_unlock(&pcb->ipsec_kpipe_decrypt_lock);
1734 } else {
1735 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1736 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1737 STATS_INC(nifs, NETIF_STATS_DROP);
0a7de745
A
1738 mbuf_freem(data);
1739 data = NULL;
5ba3f43e
A
1740 }
1741 } else {
cb323159
A
1742 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1743 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1744 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e
A
1745 }
1746 } else {
cb323159
A
1747 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
1748 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1749 STATS_INC(nifs, NETIF_STATS_DROP);
5ba3f43e 1750 }
5ba3f43e 1751
cb323159
A
1752 if (data == NULL) {
1753 // Failed to get decrypted data data
1754 kern_pbufpool_free(rx_pp, rx_ph);
1755 continue;
1756 }
5ba3f43e 1757
cb323159
A
1758 length = mbuf_pkthdr_len(data);
1759 if (length > rx_pp->pp_buflet_size) {
1760 // Flush data
1761 mbuf_freem(data);
1762 kern_pbufpool_free(rx_pp, rx_ph);
1763 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1764 STATS_INC(nifs, NETIF_STATS_DROP);
1765 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
1766 pcb->ipsec_ifp->if_xname, length, rx_pp->pp_buflet_size);
1767 continue;
1768 }
5ba3f43e 1769
cb323159
A
1770 mbuf_pkthdr_setrcvif(data, pcb->ipsec_ifp);
1771
1772 // Fillout rx packet
1773 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1774 VERIFY(rx_buf != NULL);
1775 void *rx_baddr = kern_buflet_get_object_address(rx_buf);
1776 VERIFY(rx_baddr != NULL);
1777
1778 // Copy-in data from mbuf to buflet
1779 mbuf_copydata(data, 0, length, (void *)rx_baddr);
1780 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1781
1782 // Finalize and attach the packet
1783 error = kern_buflet_set_data_offset(rx_buf, 0);
1784 VERIFY(error == 0);
1785 error = kern_buflet_set_data_length(rx_buf, length);
1786 VERIFY(error == 0);
1787 error = kern_packet_set_link_header_offset(rx_ph, 0);
1788 VERIFY(error == 0);
1789 error = kern_packet_set_network_header_offset(rx_ph, 0);
1790 VERIFY(error == 0);
1791 error = kern_packet_finalize(rx_ph);
1792 VERIFY(error == 0);
1793 error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
1794 VERIFY(error == 0);
1795
1796 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1797 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1798 bpf_tap_packet_in(pcb->ipsec_ifp, DLT_RAW, rx_ph, NULL, 0);
1799
1800 rx_ring_stats.kcrsi_slots_transferred++;
1801 rx_ring_stats.kcrsi_bytes_transferred += length;
1802
1803 if (!pcb->ipsec_ext_ifdata_stats) {
1804 ifnet_stat_increment_in(pcb->ipsec_ifp, 1, length, 0);
1805 }
5ba3f43e 1806
cb323159 1807 mbuf_freem(data);
5ba3f43e 1808
cb323159
A
1809 rx_pslot = rx_slot;
1810 rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
1811 }
5ba3f43e 1812
cb323159
A
1813done:
1814 if (tx_pslot) {
1815 kern_channel_advance_slot(tx_ring, tx_pslot);
1816 kern_channel_increment_ring_net_stats(tx_ring, pcb->ipsec_ifp, &tx_ring_stats);
1817 (void)kern_channel_reclaim(tx_ring);
5ba3f43e
A
1818 }
1819
cb323159
A
1820 // Unlock first, then exit ring
1821 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
1822 if (tx_ring != NULL) {
1823 if (tx_pslot != NULL) {
1824 kern_channel_notify(tx_ring, 0);
1825 }
1826 kr_exit(tx_ring);
1827 }
5ba3f43e 1828
cb323159 1829 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e
A
1830 }
1831
5ba3f43e
A
1832 if (rx_pslot) {
1833 kern_channel_advance_slot(rx_ring, rx_pslot);
1834 kern_channel_increment_ring_net_stats(rx_ring, pcb->ipsec_ifp, &rx_ring_stats);
1835 }
1836
5ba3f43e 1837
5ba3f43e 1838 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 1839
cb323159 1840 ipsec_data_move_end(pcb);
5ba3f43e
A
1841 return 0;
1842}
1843
1844static errno_t
1845ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
0a7de745
A
1846 struct ifnet_init_eparams *init_params,
1847 struct ifnet **ifp)
5ba3f43e
A
1848{
1849 errno_t err;
1850 nexus_controller_t controller = kern_nexus_shared_controller();
1851 struct kern_nexus_net_init net_init;
a39ff7e2 1852 struct kern_pbufpool_init pp_init;
5ba3f43e
A
1853
1854 nexus_name_t provider_name;
1855 snprintf((char *)provider_name, sizeof(provider_name),
0a7de745 1856 "com.apple.netif.%s", pcb->ipsec_if_xname);
5ba3f43e
A
1857
1858 struct kern_nexus_provider_init prov_init = {
1859 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1860 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1861 .nxpi_pre_connect = ipsec_nexus_pre_connect,
1862 .nxpi_connected = ipsec_nexus_connected,
1863 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
1864 .nxpi_disconnected = ipsec_nexus_disconnected,
1865 .nxpi_ring_init = ipsec_netif_ring_init,
1866 .nxpi_ring_fini = ipsec_netif_ring_fini,
1867 .nxpi_slot_init = NULL,
1868 .nxpi_slot_fini = NULL,
1869 .nxpi_sync_tx = ipsec_netif_sync_tx,
1870 .nxpi_sync_rx = ipsec_netif_sync_rx,
1871 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
1872 };
1873
1874 nexus_attr_t nxa = NULL;
1875 err = kern_nexus_attr_create(&nxa);
1876 IPSEC_IF_VERIFY(err == 0);
1877 if (err != 0) {
cb323159 1878 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
0a7de745 1879 __func__, err);
5ba3f43e
A
1880 goto failed;
1881 }
1882
5c9f4661 1883 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
5ba3f43e
A
1884 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1885 VERIFY(err == 0);
1886
1887 // Reset ring size for netif nexus to limit memory usage
5c9f4661 1888 uint64_t ring_size = pcb->ipsec_netif_ring_size;
5ba3f43e
A
1889 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1890 VERIFY(err == 0);
1891 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1892 VERIFY(err == 0);
1893
cb323159
A
1894 assert(err == 0);
1895
1896 if (ipsec_in_wmm_mode(pcb)) {
1897 os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1898 __func__, pcb->ipsec_if_xname);
1899
1900 init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1901
1902 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1903 IPSEC_NETIF_WMM_TX_RING_COUNT);
1904 VERIFY(err == 0);
1905 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1906 IPSEC_NETIF_WMM_RX_RING_COUNT);
1907 VERIFY(err == 0);
1908
1909 err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1910 VERIFY(err == 0);
1911 }
1912
5ba3f43e
A
1913 pcb->ipsec_netif_txring_size = ring_size;
1914
0a7de745 1915 bzero(&pp_init, sizeof(pp_init));
a39ff7e2 1916 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
cb323159
A
1917 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1918 // Note: we need more packets than can be held in the tx and rx rings because
1919 // packets can also be in the AQM queue(s)
1920 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
a39ff7e2
A
1921 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
1922 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
1923 pp_init.kbi_max_frags = 1;
0a7de745 1924 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
a39ff7e2 1925 "%s", provider_name);
cb323159
A
1926 pp_init.kbi_ctx = NULL;
1927 pp_init.kbi_ctx_retain = NULL;
1928 pp_init.kbi_ctx_release = NULL;
a39ff7e2 1929
cb323159 1930 err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
a39ff7e2 1931 if (err != 0) {
cb323159 1932 os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
a39ff7e2
A
1933 goto failed;
1934 }
1935
5ba3f43e 1936 err = kern_nexus_controller_register_provider(controller,
0a7de745
A
1937 ipsec_nx_dom_prov,
1938 provider_name,
1939 &prov_init,
1940 sizeof(prov_init),
1941 nxa,
1942 &pcb->ipsec_nx.if_provider);
5ba3f43e
A
1943 IPSEC_IF_VERIFY(err == 0);
1944 if (err != 0) {
cb323159 1945 os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
0a7de745 1946 __func__, err);
5ba3f43e
A
1947 goto failed;
1948 }
1949
1950 bzero(&net_init, sizeof(net_init));
1951 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1952 net_init.nxneti_flags = 0;
1953 net_init.nxneti_eparams = init_params;
1954 net_init.nxneti_lladdr = NULL;
1955 net_init.nxneti_prepare = ipsec_netif_prepare;
a39ff7e2 1956 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
5ba3f43e 1957 err = kern_nexus_controller_alloc_net_provider_instance(controller,
0a7de745
A
1958 pcb->ipsec_nx.if_provider,
1959 pcb,
1960 &pcb->ipsec_nx.if_instance,
1961 &net_init,
1962 ifp);
5ba3f43e
A
1963 IPSEC_IF_VERIFY(err == 0);
1964 if (err != 0) {
cb323159 1965 os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
0a7de745 1966 __func__, err);
5ba3f43e 1967 kern_nexus_controller_deregister_provider(controller,
0a7de745 1968 pcb->ipsec_nx.if_provider);
5ba3f43e
A
1969 uuid_clear(pcb->ipsec_nx.if_provider);
1970 goto failed;
1971 }
1972
1973failed:
1974 if (nxa) {
1975 kern_nexus_attr_destroy(nxa);
1976 }
a39ff7e2
A
1977 if (err && pcb->ipsec_netif_pp != NULL) {
1978 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
1979 pcb->ipsec_netif_pp = NULL;
1980 }
0a7de745 1981 return err;
5ba3f43e
A
1982}
1983
1984static void
1985ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1986{
1987 nexus_controller_t controller = kern_nexus_shared_controller();
0a7de745 1988 errno_t err;
5ba3f43e
A
1989
1990 if (!uuid_is_null(instance)) {
1991 err = kern_nexus_controller_free_provider_instance(controller,
0a7de745 1992 instance);
5ba3f43e 1993 if (err != 0) {
cb323159 1994 os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
0a7de745 1995 __func__, err);
5ba3f43e
A
1996 }
1997 uuid_clear(instance);
1998 }
1999 if (!uuid_is_null(provider)) {
2000 err = kern_nexus_controller_deregister_provider(controller,
0a7de745 2001 provider);
5ba3f43e 2002 if (err != 0) {
cb323159 2003 os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
5ba3f43e
A
2004 }
2005 uuid_clear(provider);
2006 }
2007 return;
2008}
2009
2010static void
a39ff7e2 2011ipsec_nexus_detach(struct ipsec_pcb *pcb)
5ba3f43e 2012{
a39ff7e2 2013 ipsec_nx_t nx = &pcb->ipsec_nx;
5ba3f43e 2014 nexus_controller_t controller = kern_nexus_shared_controller();
0a7de745 2015 errno_t err;
5ba3f43e 2016
cb323159 2017 if (!uuid_is_null(nx->fsw_host)) {
5ba3f43e 2018 err = kern_nexus_ifdetach(controller,
cb323159
A
2019 nx->fsw_instance,
2020 nx->fsw_host);
5ba3f43e 2021 if (err != 0) {
cb323159 2022 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms host failed %d\n",
0a7de745 2023 __func__, err);
5ba3f43e
A
2024 }
2025 }
2026
cb323159 2027 if (!uuid_is_null(nx->fsw_device)) {
5ba3f43e 2028 err = kern_nexus_ifdetach(controller,
cb323159
A
2029 nx->fsw_instance,
2030 nx->fsw_device);
5ba3f43e 2031 if (err != 0) {
cb323159 2032 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
0a7de745 2033 __func__, err);
5ba3f43e
A
2034 }
2035 }
2036
2037 ipsec_detach_provider_and_instance(nx->if_provider,
0a7de745 2038 nx->if_instance);
cb323159
A
2039 ipsec_detach_provider_and_instance(nx->fsw_provider,
2040 nx->fsw_instance);
5ba3f43e 2041
a39ff7e2
A
2042 if (pcb->ipsec_netif_pp != NULL) {
2043 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2044 pcb->ipsec_netif_pp = NULL;
a39ff7e2 2045 }
5ba3f43e
A
2046 memset(nx, 0, sizeof(*nx));
2047}
2048
2049static errno_t
5c9f4661 2050ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
cb323159 2051 const char *type_name,
0a7de745
A
2052 const char *ifname,
2053 uuid_t *provider, uuid_t *instance)
5ba3f43e
A
2054{
2055 nexus_attr_t attr = NULL;
2056 nexus_controller_t controller = kern_nexus_shared_controller();
2057 uuid_t dom_prov;
2058 errno_t err;
2059 struct kern_nexus_init init;
0a7de745 2060 nexus_name_t provider_name;
5ba3f43e 2061
cb323159 2062 err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
0a7de745 2063 &dom_prov);
5ba3f43e
A
2064 IPSEC_IF_VERIFY(err == 0);
2065 if (err != 0) {
cb323159 2066 os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
0a7de745 2067 __func__, type_name, err);
5ba3f43e
A
2068 goto failed;
2069 }
2070
2071 err = kern_nexus_attr_create(&attr);
2072 IPSEC_IF_VERIFY(err == 0);
2073 if (err != 0) {
cb323159 2074 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
0a7de745 2075 __func__, err);
5ba3f43e
A
2076 goto failed;
2077 }
2078
5c9f4661 2079 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
5ba3f43e
A
2080 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2081 VERIFY(err == 0);
2082
2083 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
5c9f4661 2084 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
5ba3f43e
A
2085 err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
2086 VERIFY(err == 0);
5c9f4661 2087 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
5ba3f43e
A
2088 err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
2089 VERIFY(err == 0);
2090
2091 snprintf((char *)provider_name, sizeof(provider_name),
0a7de745 2092 "com.apple.%s.%s", type_name, ifname);
5ba3f43e 2093 err = kern_nexus_controller_register_provider(controller,
0a7de745
A
2094 dom_prov,
2095 provider_name,
2096 NULL,
2097 0,
2098 attr,
2099 provider);
5ba3f43e
A
2100 kern_nexus_attr_destroy(attr);
2101 attr = NULL;
2102 IPSEC_IF_VERIFY(err == 0);
2103 if (err != 0) {
cb323159 2104 os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
0a7de745 2105 __func__, type_name, err);
5ba3f43e
A
2106 goto failed;
2107 }
0a7de745 2108 bzero(&init, sizeof(init));
5ba3f43e
A
2109 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2110 err = kern_nexus_controller_alloc_provider_instance(controller,
0a7de745
A
2111 *provider,
2112 NULL,
2113 instance, &init);
5ba3f43e
A
2114 IPSEC_IF_VERIFY(err == 0);
2115 if (err != 0) {
cb323159 2116 os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
0a7de745 2117 __func__, type_name, err);
5ba3f43e 2118 kern_nexus_controller_deregister_provider(controller,
0a7de745 2119 *provider);
5ba3f43e
A
2120 uuid_clear(*provider);
2121 }
2122failed:
0a7de745 2123 return err;
5ba3f43e
A
2124}
2125
2126static errno_t
cb323159 2127ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
5ba3f43e
A
2128{
2129 nexus_controller_t controller = kern_nexus_shared_controller();
2130 errno_t err = 0;
2131 ipsec_nx_t nx = &pcb->ipsec_nx;
2132
cb323159 2133 // Allocate flowswitch
5c9f4661 2134 err = ipsec_create_fs_provider_and_instance(pcb,
cb323159 2135 "flowswitch",
0a7de745 2136 pcb->ipsec_ifp->if_xname,
cb323159
A
2137 &nx->fsw_provider,
2138 &nx->fsw_instance);
5ba3f43e 2139 if (err != 0) {
cb323159 2140 os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
0a7de745 2141 __func__);
5ba3f43e
A
2142 goto failed;
2143 }
2144
cb323159
A
2145 // Attach flowswitch to device port
2146 err = kern_nexus_ifattach(controller, nx->fsw_instance,
0a7de745 2147 NULL, nx->if_instance,
cb323159 2148 FALSE, &nx->fsw_device);
5ba3f43e 2149 if (err != 0) {
cb323159 2150 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
5ba3f43e
A
2151 goto failed;
2152 }
2153
cb323159
A
2154 // Attach flowswitch to host port
2155 err = kern_nexus_ifattach(controller, nx->fsw_instance,
0a7de745 2156 NULL, nx->if_instance,
cb323159 2157 TRUE, &nx->fsw_host);
5ba3f43e 2158 if (err != 0) {
cb323159 2159 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms host %d\n", __func__, err);
5ba3f43e
A
2160 goto failed;
2161 }
2162
2163 // Extract the agent UUID and save for later
cb323159
A
2164 struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2165 if (flowswitch_nx != NULL) {
2166 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
5ba3f43e
A
2167 if (flowswitch != NULL) {
2168 FSW_RLOCK(flowswitch);
cb323159 2169 uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
5ba3f43e
A
2170 FSW_UNLOCK(flowswitch);
2171 } else {
cb323159 2172 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
5ba3f43e 2173 }
cb323159 2174 nx_release(flowswitch_nx);
5ba3f43e 2175 } else {
cb323159 2176 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
5ba3f43e
A
2177 }
2178
0a7de745 2179 return 0;
5ba3f43e
A
2180
2181failed:
a39ff7e2 2182 ipsec_nexus_detach(pcb);
5ba3f43e
A
2183
2184 errno_t detach_error = 0;
2185 if ((detach_error = ifnet_detach(pcb->ipsec_ifp)) != 0) {
cb323159 2186 panic("ipsec_flowswitch_attach - ifnet_detach failed: %d\n", detach_error);
5ba3f43e
A
2187 /* NOT REACHED */
2188 }
2189
0a7de745 2190 return err;
5ba3f43e
A
2191}
2192
2193#pragma mark Kernel Pipe Nexus
2194
2195static errno_t
cb323159 2196ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
5ba3f43e
A
2197{
2198 nexus_attr_t nxa = NULL;
2199 errno_t result;
2200
2201 lck_mtx_lock(&ipsec_lock);
2202 if (ipsec_ncd_refcount++) {
2203 lck_mtx_unlock(&ipsec_lock);
2204 return 0;
2205 }
2206
2207 result = kern_nexus_controller_create(&ipsec_ncd);
2208 if (result) {
cb323159 2209 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
0a7de745 2210 __FUNCTION__, result);
5ba3f43e
A
2211 goto done;
2212 }
2213
2214 uuid_t dom_prov;
cb323159 2215 result = kern_nexus_get_default_domain_provider(
0a7de745 2216 NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
5ba3f43e 2217 if (result) {
cb323159 2218 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
0a7de745 2219 __FUNCTION__, result);
5ba3f43e
A
2220 goto done;
2221 }
2222
2223 struct kern_nexus_provider_init prov_init = {
2224 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2225 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2226 .nxpi_pre_connect = ipsec_nexus_pre_connect,
2227 .nxpi_connected = ipsec_nexus_connected,
2228 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
2229 .nxpi_disconnected = ipsec_nexus_disconnected,
2230 .nxpi_ring_init = ipsec_kpipe_ring_init,
2231 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
2232 .nxpi_slot_init = NULL,
2233 .nxpi_slot_fini = NULL,
2234 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
2235 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
2236 .nxpi_tx_doorbell = NULL,
2237 };
2238
2239 result = kern_nexus_attr_create(&nxa);
2240 if (result) {
cb323159 2241 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
0a7de745 2242 __FUNCTION__, result);
5ba3f43e
A
2243 goto done;
2244 }
2245
2246 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
2247 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
2248 VERIFY(result == 0);
2249
2250 // Reset ring size for kernel pipe nexus to limit memory usage
cb323159
A
2251 // Note: It's better to have less on slots on the kpipe TX ring than the netif
2252 // so back pressure is applied at the AQM layer
2253 uint64_t ring_size =
2254 pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
2255 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2256 if_ipsec_ring_size;
5ba3f43e
A
2257 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
2258 VERIFY(result == 0);
cb323159
A
2259
2260 ring_size =
2261 pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
2262 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
2263 if_ipsec_ring_size;
5ba3f43e
A
2264 result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
2265 VERIFY(result == 0);
2266
2267 result = kern_nexus_controller_register_provider(ipsec_ncd,
0a7de745
A
2268 dom_prov,
2269 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
2270 &prov_init,
2271 sizeof(prov_init),
2272 nxa,
2273 &ipsec_kpipe_uuid);
5ba3f43e 2274 if (result) {
cb323159 2275 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
0a7de745 2276 __FUNCTION__, result);
5ba3f43e
A
2277 goto done;
2278 }
2279
2280done:
2281 if (nxa) {
2282 kern_nexus_attr_destroy(nxa);
2283 }
2284
2285 if (result) {
2286 if (ipsec_ncd) {
2287 kern_nexus_controller_destroy(ipsec_ncd);
2288 ipsec_ncd = NULL;
2289 }
2290 ipsec_ncd_refcount = 0;
2291 }
2292
2293 lck_mtx_unlock(&ipsec_lock);
2294
2295 return result;
2296}
2297
2298static void
2299ipsec_unregister_kernel_pipe_nexus(void)
2300{
2301 lck_mtx_lock(&ipsec_lock);
2302
2303 VERIFY(ipsec_ncd_refcount > 0);
2304
2305 if (--ipsec_ncd_refcount == 0) {
2306 kern_nexus_controller_destroy(ipsec_ncd);
2307 ipsec_ncd = NULL;
2308 }
2309
2310 lck_mtx_unlock(&ipsec_lock);
2311}
2312
cb323159
A
2313/* This structure only holds onto kpipe channels that need to be
2314 * freed in the future, but are cleared from the pcb under lock
2315 */
2316struct ipsec_detached_channels {
2317 int count;
2318 kern_pbufpool_t pp;
2319 uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
2320};
5ba3f43e 2321
cb323159
A
2322static void
2323ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
2324{
2325 LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
5ba3f43e 2326
cb323159
A
2327 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
2328 for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2329 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2330 }
2331 dc->count = 0;
2332 return;
2333 }
5ba3f43e 2334
cb323159 2335 dc->count = pcb->ipsec_kpipe_count;
39236c6e 2336
cb323159
A
2337 VERIFY(dc->count >= 0);
2338 VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
39236c6e 2339
cb323159
A
2340 for (int i = 0; i < dc->count; i++) {
2341 VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2342 uuid_copy(dc->uuids[i], pcb->ipsec_kpipe_uuid[i]);
2343 uuid_clear(pcb->ipsec_kpipe_uuid[i]);
2344 }
2345 for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
2346 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2347 }
5ba3f43e 2348
cb323159
A
2349 if (dc->count) {
2350 VERIFY(pcb->ipsec_kpipe_pp);
5ba3f43e 2351 } else {
cb323159 2352 VERIFY(!pcb->ipsec_kpipe_pp);
5ba3f43e
A
2353 }
2354
cb323159
A
2355 dc->pp = pcb->ipsec_kpipe_pp;
2356
2357 pcb->ipsec_kpipe_pp = NULL;
2358
2359 ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
2360}
2361
2362static void
2363ipsec_free_channels(struct ipsec_detached_channels *dc)
2364{
2365 if (!dc->count) {
2366 return;
5ba3f43e
A
2367 }
2368
cb323159
A
2369 for (int i = 0; i < dc->count; i++) {
2370 errno_t result;
2371 result = kern_nexus_controller_free_provider_instance(ipsec_ncd, dc->uuids[i]);
2372 VERIFY(!result);
2373 }
2374
2375 VERIFY(dc->pp);
2376 kern_pbufpool_destroy(dc->pp);
2377
2378 ipsec_unregister_kernel_pipe_nexus();
2379
2380 memset(dc, 0, sizeof(*dc));
5ba3f43e
A
2381}
2382
2383static errno_t
2384ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
39236c6e 2385{
5ba3f43e 2386 struct kern_nexus_init init;
a39ff7e2 2387 struct kern_pbufpool_init pp_init;
5ba3f43e
A
2388 errno_t result;
2389
d9a64523
A
2390 kauth_cred_t cred = kauth_cred_get();
2391 result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
2392 if (result) {
2393 return result;
2394 }
2395
cb323159
A
2396 VERIFY(pcb->ipsec_kpipe_count);
2397 VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
5ba3f43e 2398
cb323159 2399 result = ipsec_register_kernel_pipe_nexus(pcb);
5ba3f43e
A
2400
2401 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2402
cb323159
A
2403 if (result) {
2404 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
2405 __func__, pcb->ipsec_if_xname);
a39ff7e2
A
2406 goto done;
2407 }
2408
cb323159 2409 VERIFY(ipsec_ncd);
5ba3f43e 2410
0a7de745 2411 bzero(&pp_init, sizeof(pp_init));
a39ff7e2 2412 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
cb323159
A
2413 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2414 // Note: We only needs are many packets as can be held in the tx and rx rings
2415 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
a39ff7e2
A
2416 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2417 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2418 pp_init.kbi_max_frags = 1;
2419 pp_init.kbi_flags |= KBIF_QUANTUM;
0a7de745 2420 (void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
a39ff7e2 2421 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
cb323159
A
2422 pp_init.kbi_ctx = NULL;
2423 pp_init.kbi_ctx_retain = NULL;
2424 pp_init.kbi_ctx_release = NULL;
a39ff7e2 2425
cb323159 2426 result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
a39ff7e2
A
2427 NULL);
2428 if (result != 0) {
cb323159
A
2429 os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
2430 __func__, pcb->ipsec_if_xname, result);
a39ff7e2
A
2431 goto done;
2432 }
2433
0a7de745 2434 bzero(&init, sizeof(init));
5ba3f43e 2435 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
a39ff7e2 2436 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
5ba3f43e 2437
cb323159
A
2438 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
2439 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
2440 result = kern_nexus_controller_alloc_provider_instance(ipsec_ncd,
2441 ipsec_kpipe_uuid, pcb, &pcb->ipsec_kpipe_uuid[i], &init);
2442
2443 if (result == 0) {
2444 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
2445 pid_t pid = pcb->ipsec_kpipe_pid;
2446 if (!pid) {
2447 pid = proc_pid(proc);
2448 }
2449 result = kern_nexus_controller_bind_provider_instance(ipsec_ncd,
2450 pcb->ipsec_kpipe_uuid[i], &port,
2451 pid, NULL, NULL, 0, NEXUS_BIND_PID);
2452 }
5ba3f43e 2453
cb323159
A
2454 if (result) {
2455 /* Unwind all of them on error */
2456 for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
2457 if (!uuid_is_null(pcb->ipsec_kpipe_uuid[j])) {
2458 kern_nexus_controller_free_provider_instance(ipsec_ncd,
2459 pcb->ipsec_kpipe_uuid[j]);
2460 uuid_clear(pcb->ipsec_kpipe_uuid[j]);
2461 }
2462 }
2463 goto done;
2464 }
2465 }
5ba3f43e
A
2466
2467done:
2468 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
0a7de745 2469
5ba3f43e 2470 if (result) {
a39ff7e2
A
2471 if (pcb->ipsec_kpipe_pp != NULL) {
2472 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
2473 pcb->ipsec_kpipe_pp = NULL;
2474 }
5ba3f43e 2475 ipsec_unregister_kernel_pipe_nexus();
cb323159
A
2476 } else {
2477 ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
39236c6e 2478 }
0a7de745 2479
5ba3f43e 2480 return result;
39236c6e
A
2481}
2482
5ba3f43e
A
2483#endif // IPSEC_NEXUS
2484
39236c6e
A
2485
2486/* Kernel control functions */
2487
5ba3f43e 2488static inline void
5c9f4661 2489ipsec_free_pcb(struct ipsec_pcb *pcb, bool in_list)
5ba3f43e
A
2490{
2491#if IPSEC_NEXUS
2492 mbuf_freem_list(pcb->ipsec_input_chain);
cb323159 2493 pcb->ipsec_input_chain_count = 0;
5ba3f43e 2494 lck_mtx_destroy(&pcb->ipsec_input_chain_lock, ipsec_lck_grp);
cb323159
A
2495 lck_mtx_destroy(&pcb->ipsec_kpipe_encrypt_lock, ipsec_lck_grp);
2496 lck_mtx_destroy(&pcb->ipsec_kpipe_decrypt_lock, ipsec_lck_grp);
5ba3f43e 2497#endif // IPSEC_NEXUS
cb323159 2498 lck_mtx_destroy(&pcb->ipsec_pcb_data_move_lock, ipsec_lck_grp);
5ba3f43e 2499 lck_rw_destroy(&pcb->ipsec_pcb_lock, ipsec_lck_grp);
5c9f4661
A
2500 if (in_list) {
2501 lck_mtx_lock(&ipsec_lock);
2502 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
2503 lck_mtx_unlock(&ipsec_lock);
2504 }
5ba3f43e
A
2505 zfree(ipsec_pcb_zone, pcb);
2506}
2507
39236c6e 2508static errno_t
5c9f4661 2509ipsec_ctl_bind(kern_ctl_ref kctlref,
0a7de745
A
2510 struct sockaddr_ctl *sac,
2511 void **unitinfo)
39236c6e 2512{
5ba3f43e
A
2513 struct ipsec_pcb *pcb = zalloc(ipsec_pcb_zone);
2514 memset(pcb, 0, sizeof(*pcb));
39037602 2515
39236c6e 2516 /* Setup the protocol control block */
39236c6e
A
2517 *unitinfo = pcb;
2518 pcb->ipsec_ctlref = kctlref;
2519 pcb->ipsec_unit = sac->sc_unit;
fe8ab488 2520 pcb->ipsec_output_service_class = MBUF_SC_OAM;
5ba3f43e 2521
5c9f4661
A
2522#if IPSEC_NEXUS
2523 pcb->ipsec_use_netif = false;
2524 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
cb323159
A
2525 pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
2526 pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
2527 pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
5c9f4661
A
2528#endif // IPSEC_NEXUS
2529
2530 lck_rw_init(&pcb->ipsec_pcb_lock, ipsec_lck_grp, ipsec_lck_attr);
cb323159 2531 lck_mtx_init(&pcb->ipsec_pcb_data_move_lock, ipsec_lck_grp, ipsec_lck_attr);
5c9f4661 2532#if IPSEC_NEXUS
cb323159 2533 pcb->ipsec_input_chain_count = 0;
5c9f4661 2534 lck_mtx_init(&pcb->ipsec_input_chain_lock, ipsec_lck_grp, ipsec_lck_attr);
cb323159
A
2535 lck_mtx_init(&pcb->ipsec_kpipe_encrypt_lock, ipsec_lck_grp, ipsec_lck_attr);
2536 lck_mtx_init(&pcb->ipsec_kpipe_decrypt_lock, ipsec_lck_grp, ipsec_lck_attr);
5c9f4661
A
2537#endif // IPSEC_NEXUS
2538
0a7de745 2539 return 0;
5c9f4661
A
2540}
2541
2542static errno_t
2543ipsec_ctl_connect(kern_ctl_ref kctlref,
0a7de745
A
2544 struct sockaddr_ctl *sac,
2545 void **unitinfo)
5c9f4661
A
2546{
2547 struct ifnet_init_eparams ipsec_init = {};
2548 errno_t result = 0;
2549
2550 if (*unitinfo == NULL) {
2551 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
2552 }
2553
2554 struct ipsec_pcb *pcb = *unitinfo;
94ff46dc
A
2555 if (pcb == NULL) {
2556 return EINVAL;
2557 }
5c9f4661 2558
5ba3f43e
A
2559 lck_mtx_lock(&ipsec_lock);
2560
2561 /* Find some open interface id */
2562 u_int32_t chosen_unique_id = 1;
2563 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
2564 if (next_pcb != NULL) {
2565 /* List was not empty, add one to the last item */
2566 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2567 next_pcb = NULL;
2568
2569 /*
2570 * If this wrapped the id number, start looking at
2571 * the front of the list for an unused id.
2572 */
2573 if (chosen_unique_id == 0) {
2574 /* Find the next unused ID */
2575 chosen_unique_id = 1;
2576 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
2577 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
2578 /* We found a gap */
2579 break;
2580 }
2581
2582 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
2583 }
2584 }
2585 }
2586
2587 pcb->ipsec_unique_id = chosen_unique_id;
2588
2589 if (next_pcb != NULL) {
2590 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
2591 } else {
2592 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
2593 }
2594 lck_mtx_unlock(&ipsec_lock);
2595
2596 snprintf(pcb->ipsec_if_xname, sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
2597 snprintf(pcb->ipsec_unique_name, sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
cb323159 2598 os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
5ba3f43e 2599
39236c6e
A
2600 /* Create the interface */
2601 bzero(&ipsec_init, sizeof(ipsec_init));
2602 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
0a7de745 2603 ipsec_init.len = sizeof(ipsec_init);
5ba3f43e
A
2604
2605#if IPSEC_NEXUS
5c9f4661
A
2606 if (pcb->ipsec_use_netif) {
2607 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
2608 } else
5ba3f43e 2609#endif // IPSEC_NEXUS
5c9f4661
A
2610 {
2611 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
2612 ipsec_init.start = ipsec_start;
2613 }
5ba3f43e 2614 ipsec_init.name = "ipsec";
39236c6e 2615 ipsec_init.unit = pcb->ipsec_unit - 1;
5ba3f43e
A
2616 ipsec_init.uniqueid = pcb->ipsec_unique_name;
2617 ipsec_init.uniqueid_len = strlen(pcb->ipsec_unique_name);
cb323159 2618 ipsec_init.family = IFNET_FAMILY_IPSEC;
39236c6e
A
2619 ipsec_init.type = IFT_OTHER;
2620 ipsec_init.demux = ipsec_demux;
2621 ipsec_init.add_proto = ipsec_add_proto;
2622 ipsec_init.del_proto = ipsec_del_proto;
2623 ipsec_init.softc = pcb;
2624 ipsec_init.ioctl = ipsec_ioctl;
2625 ipsec_init.detach = ipsec_detached;
5ba3f43e
A
2626
2627#if IPSEC_NEXUS
cb323159
A
2628 /* We don't support kpipes without a netif */
2629 if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
2630 result = ENOTSUP;
2631 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
2632 ipsec_free_pcb(pcb, true);
2633 *unitinfo = NULL;
2634 return result;
2635 }
2636
2637 if (if_ipsec_debug != 0) {
2638 printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
2639 "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
2640 __func__,
2641 ipsec_init.name, ipsec_init.unit,
2642 pcb->ipsec_use_netif,
2643 pcb->ipsec_kpipe_count,
2644 pcb->ipsec_slot_size,
2645 pcb->ipsec_netif_ring_size,
2646 pcb->ipsec_kpipe_tx_ring_size,
2647 pcb->ipsec_kpipe_rx_ring_size);
2648 }
5c9f4661 2649 if (pcb->ipsec_use_netif) {
cb323159
A
2650 if (pcb->ipsec_kpipe_count) {
2651 result = ipsec_enable_channel(pcb, current_proc());
2652 if (result) {
2653 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
2654 __func__, pcb->ipsec_if_xname);
2655 ipsec_free_pcb(pcb, true);
2656 *unitinfo = NULL;
2657 return result;
2658 }
2659 }
2660
5c9f4661
A
2661 result = ipsec_nexus_ifattach(pcb, &ipsec_init, &pcb->ipsec_ifp);
2662 if (result != 0) {
cb323159 2663 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
5c9f4661
A
2664 ipsec_free_pcb(pcb, true);
2665 *unitinfo = NULL;
2666 return result;
2667 }
5ba3f43e 2668
cb323159 2669 result = ipsec_flowswitch_attach(pcb);
5c9f4661 2670 if (result != 0) {
cb323159
A
2671 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
2672 // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
2673 // in ipsec_detached().
5c9f4661
A
2674 *unitinfo = NULL;
2675 return result;
2676 }
5ba3f43e 2677
5c9f4661
A
2678 /* Attach to bpf */
2679 bpfattach(pcb->ipsec_ifp, DLT_RAW, 0);
2680 } else
5ba3f43e 2681#endif // IPSEC_NEXUS
5c9f4661
A
2682 {
2683 result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp);
2684 if (result != 0) {
cb323159 2685 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
5c9f4661
A
2686 ipsec_free_pcb(pcb, true);
2687 *unitinfo = NULL;
2688 return result;
2689 }
2690 ipsec_ifnet_set_attrs(pcb->ipsec_ifp);
2691
2692 /* Attach the interface */
2693 result = ifnet_attach(pcb->ipsec_ifp, NULL);
2694 if (result != 0) {
cb323159 2695 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
5c9f4661
A
2696 ifnet_release(pcb->ipsec_ifp);
2697 ipsec_free_pcb(pcb, true);
2698 *unitinfo = NULL;
0a7de745 2699 return result;
5c9f4661 2700 }
5ba3f43e 2701
5c9f4661
A
2702 /* Attach to bpf */
2703 bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
2704 }
5ba3f43e 2705
c6bf4f31 2706#if IPSEC_NEXUS
cb323159
A
2707 /*
2708 * Mark the data path as ready.
2709 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
2710 */
2711 if (pcb->ipsec_kpipe_count == 0) {
2712 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
2713 IPSEC_SET_DATA_PATH_READY(pcb);
2714 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
2715 }
c6bf4f31 2716#endif
cb323159 2717
5ba3f43e
A
2718 /* The interfaces resoures allocated, mark it as running */
2719 ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
2720
0a7de745 2721 return 0;
39236c6e
A
2722}
2723
2724static errno_t
0a7de745
A
2725ipsec_detach_ip(ifnet_t interface,
2726 protocol_family_t protocol,
2727 socket_t pf_socket)
39236c6e
A
2728{
2729 errno_t result = EPROTONOSUPPORT;
0a7de745 2730
39236c6e
A
2731 /* Attempt a detach */
2732 if (protocol == PF_INET) {
0a7de745
A
2733 struct ifreq ifr;
2734
39236c6e
A
2735 bzero(&ifr, sizeof(ifr));
2736 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
0a7de745
A
2737 ifnet_name(interface), ifnet_unit(interface));
2738
39236c6e 2739 result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
0a7de745
A
2740 } else if (protocol == PF_INET6) {
2741 struct in6_ifreq ifr6;
2742
39236c6e
A
2743 bzero(&ifr6, sizeof(ifr6));
2744 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
0a7de745
A
2745 ifnet_name(interface), ifnet_unit(interface));
2746
39236c6e
A
2747 result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2748 }
0a7de745 2749
39236c6e
A
2750 return result;
2751}
2752
2753static void
0a7de745
A
2754ipsec_remove_address(ifnet_t interface,
2755 protocol_family_t protocol,
2756 ifaddr_t address,
2757 socket_t pf_socket)
39236c6e
A
2758{
2759 errno_t result = 0;
0a7de745 2760
39236c6e
A
2761 /* Attempt a detach */
2762 if (protocol == PF_INET) {
0a7de745
A
2763 struct ifreq ifr;
2764
39236c6e
A
2765 bzero(&ifr, sizeof(ifr));
2766 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
0a7de745 2767 ifnet_name(interface), ifnet_unit(interface));
39236c6e
A
2768 result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2769 if (result != 0) {
cb323159 2770 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
0a7de745 2771 } else {
39236c6e
A
2772 result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2773 if (result != 0) {
cb323159 2774 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
39236c6e
A
2775 }
2776 }
0a7de745
A
2777 } else if (protocol == PF_INET6) {
2778 struct in6_ifreq ifr6;
2779
39236c6e
A
2780 bzero(&ifr6, sizeof(ifr6));
2781 snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
0a7de745 2782 ifnet_name(interface), ifnet_unit(interface));
39236c6e 2783 result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
0a7de745 2784 sizeof(ifr6.ifr_addr));
39236c6e 2785 if (result != 0) {
cb323159 2786 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
0a7de745
A
2787 result);
2788 } else {
39236c6e
A
2789 result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2790 if (result != 0) {
cb323159 2791 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
0a7de745 2792 result);
39236c6e
A
2793 }
2794 }
2795 }
2796}
2797
2798static void
0a7de745
A
2799ipsec_cleanup_family(ifnet_t interface,
2800 protocol_family_t protocol)
39236c6e 2801{
0a7de745
A
2802 errno_t result = 0;
2803 socket_t pf_socket = NULL;
2804 ifaddr_t *addresses = NULL;
2805 int i;
2806
39236c6e 2807 if (protocol != PF_INET && protocol != PF_INET6) {
cb323159 2808 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
39236c6e
A
2809 return;
2810 }
0a7de745 2811
39236c6e
A
2812 /* Create a socket for removing addresses and detaching the protocol */
2813 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2814 if (result != 0) {
0a7de745 2815 if (result != EAFNOSUPPORT) {
cb323159 2816 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
0a7de745
A
2817 protocol == PF_INET ? "IP" : "IPv6", result);
2818 }
39236c6e
A
2819 goto cleanup;
2820 }
0a7de745 2821
39236c6e
A
2822 /* always set SS_PRIV, we want to close and detach regardless */
2823 sock_setpriv(pf_socket, 1);
0a7de745 2824
39236c6e
A
2825 result = ipsec_detach_ip(interface, protocol, pf_socket);
2826 if (result == 0 || result == ENXIO) {
2827 /* We are done! We either detached or weren't attached. */
2828 goto cleanup;
0a7de745 2829 } else if (result != EBUSY) {
39236c6e 2830 /* Uh, not really sure what happened here... */
cb323159 2831 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
39236c6e
A
2832 goto cleanup;
2833 }
0a7de745 2834
39236c6e
A
2835 /*
2836 * At this point, we received an EBUSY error. This means there are
2837 * addresses attached. We should detach them and then try again.
2838 */
2839 result = ifnet_get_address_list_family(interface, &addresses, protocol);
2840 if (result != 0) {
cb323159 2841 os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
0a7de745
A
2842 ifnet_name(interface), ifnet_unit(interface),
2843 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
39236c6e
A
2844 goto cleanup;
2845 }
0a7de745 2846
39236c6e
A
2847 for (i = 0; addresses[i] != 0; i++) {
2848 ipsec_remove_address(interface, protocol, addresses[i], pf_socket);
2849 }
2850 ifnet_free_address_list(addresses);
2851 addresses = NULL;
0a7de745 2852
39236c6e
A
2853 /*
2854 * The addresses should be gone, we should try the remove again.
2855 */
2856 result = ipsec_detach_ip(interface, protocol, pf_socket);
2857 if (result != 0 && result != ENXIO) {
cb323159 2858 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
39236c6e 2859 }
0a7de745 2860
39236c6e 2861cleanup:
0a7de745 2862 if (pf_socket != NULL) {
39236c6e 2863 sock_close(pf_socket);
0a7de745
A
2864 }
2865
2866 if (addresses != NULL) {
39236c6e 2867 ifnet_free_address_list(addresses);
0a7de745 2868 }
39236c6e
A
2869}
2870
2871static errno_t
0a7de745
A
2872ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
2873 __unused u_int32_t unit,
2874 void *unitinfo)
39236c6e 2875{
5ba3f43e
A
2876 struct ipsec_pcb *pcb = unitinfo;
2877 ifnet_t ifp = NULL;
2878 errno_t result = 0;
39037602 2879
5ba3f43e 2880 if (pcb == NULL) {
39037602 2881 return EINVAL;
5ba3f43e
A
2882 }
2883
cb323159
A
2884 /* Wait until all threads in the data paths are done. */
2885 ipsec_wait_data_move_drain(pcb);
2886
5ba3f43e
A
2887#if IPSEC_NEXUS
2888 // Tell the nexus to stop all rings
2889 if (pcb->ipsec_netif_nexus != NULL) {
2890 kern_nexus_stop(pcb->ipsec_netif_nexus);
2891 }
2892#endif // IPSEC_NEXUS
2893
2894 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
2895
2896#if IPSEC_NEXUS
cb323159
A
2897 if (if_ipsec_debug != 0) {
2898 printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
2899 pcb->ipsec_if_xname, pcb->ipsec_unique_name);
2900 }
2901
2902 struct ipsec_detached_channels dc;
2903 ipsec_detach_channels(pcb, &dc);
5ba3f43e 2904#endif // IPSEC_NEXUS
39037602 2905
39236c6e 2906 pcb->ipsec_ctlref = NULL;
5ba3f43e 2907
5c9f4661
A
2908 ifp = pcb->ipsec_ifp;
2909 if (ifp != NULL) {
2910#if IPSEC_NEXUS
2911 if (pcb->ipsec_netif_nexus != NULL) {
2912 /*
2913 * Quiesce the interface and flush any pending outbound packets.
2914 */
2915 if_down(ifp);
2916
2917 /* Increment refcnt, but detach interface */
2918 ifnet_incr_iorefcnt(ifp);
2919 if ((result = ifnet_detach(ifp)) != 0) {
2920 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
2921 /* NOT REACHED */
2922 }
5ba3f43e 2923
5c9f4661
A
2924 /*
2925 * We want to do everything in our power to ensure that the interface
2926 * really goes away when the socket is closed. We must remove IP/IPv6
2927 * addresses and detach the protocols. Finally, we can remove and
2928 * release the interface.
2929 */
2930 key_delsp_for_ipsec_if(ifp);
5ba3f43e 2931
5c9f4661
A
2932 ipsec_cleanup_family(ifp, AF_INET);
2933 ipsec_cleanup_family(ifp, AF_INET6);
5ba3f43e 2934
5c9f4661
A
2935 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2936
cb323159
A
2937 ipsec_free_channels(&dc);
2938
a39ff7e2 2939 ipsec_nexus_detach(pcb);
5c9f4661
A
2940
2941 /* Decrement refcnt to finish detaching and freeing */
2942 ifnet_decr_iorefcnt(ifp);
2943 } else
2944#endif // IPSEC_NEXUS
2945 {
2946 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
5ba3f43e
A
2947
2948#if IPSEC_NEXUS
cb323159 2949 ipsec_free_channels(&dc);
5ba3f43e
A
2950#endif // IPSEC_NEXUS
2951
5c9f4661
A
2952 /*
2953 * We want to do everything in our power to ensure that the interface
2954 * really goes away when the socket is closed. We must remove IP/IPv6
2955 * addresses and detach the protocols. Finally, we can remove and
2956 * release the interface.
2957 */
2958 key_delsp_for_ipsec_if(ifp);
2959
2960 ipsec_cleanup_family(ifp, AF_INET);
2961 ipsec_cleanup_family(ifp, AF_INET6);
2962
2963 /*
2964 * Detach now; ipsec_detach() will be called asynchronously once
2965 * the I/O reference count drops to 0. There we will invoke
2966 * ifnet_release().
2967 */
2968 if ((result = ifnet_detach(ifp)) != 0) {
cb323159 2969 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
5c9f4661
A
2970 }
2971 }
2972 } else {
2973 // Bound, but not connected
2974 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
2975 ipsec_free_pcb(pcb, false);
2976 }
0a7de745 2977
39236c6e
A
2978 return 0;
2979}
2980
2981static errno_t
0a7de745
A
2982ipsec_ctl_send(__unused kern_ctl_ref kctlref,
2983 __unused u_int32_t unit,
2984 __unused void *unitinfo,
2985 mbuf_t m,
2986 __unused int flags)
39236c6e 2987{
0a7de745
A
2988 /* Receive messages from the control socket. Currently unused. */
2989 mbuf_freem(m);
39236c6e
A
2990 return 0;
2991}
2992
2993static errno_t
0a7de745
A
2994ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
2995 __unused u_int32_t unit,
2996 void *unitinfo,
2997 int opt,
2998 void *data,
2999 size_t len)
39236c6e 3000{
0a7de745 3001 errno_t result = 0;
94ff46dc
A
3002 struct ipsec_pcb *pcb = unitinfo;
3003 if (pcb == NULL) {
3004 return EINVAL;
3005 }
0a7de745 3006
39236c6e
A
3007 /* check for privileges for privileged options */
3008 switch (opt) {
0a7de745
A
3009 case IPSEC_OPT_FLAGS:
3010 case IPSEC_OPT_EXT_IFDATA_STATS:
3011 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3012 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3013 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
3014 return EPERM;
3015 }
3016 break;
39236c6e 3017 }
0a7de745 3018
39236c6e 3019 switch (opt) {
cb323159 3020 case IPSEC_OPT_FLAGS: {
0a7de745
A
3021 if (len != sizeof(u_int32_t)) {
3022 result = EMSGSIZE;
3023 } else {
cb323159 3024 pcb->ipsec_external_flags = *(u_int32_t *)data;
0a7de745
A
3025 }
3026 break;
cb323159 3027 }
0a7de745 3028
cb323159 3029 case IPSEC_OPT_EXT_IFDATA_STATS: {
0a7de745
A
3030 if (len != sizeof(int)) {
3031 result = EMSGSIZE;
39236c6e 3032 break;
0a7de745
A
3033 }
3034 if (pcb->ipsec_ifp == NULL) {
3035 // Only can set after connecting
3036 result = EINVAL;
39236c6e 3037 break;
0a7de745
A
3038 }
3039 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3040 break;
cb323159 3041 }
0a7de745
A
3042
3043 case IPSEC_OPT_INC_IFDATA_STATS_IN:
3044 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3045 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3046
3047 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3048 result = EINVAL;
39236c6e
A
3049 break;
3050 }
0a7de745
A
3051 if (pcb->ipsec_ifp == NULL) {
3052 // Only can set after connecting
3053 result = EINVAL;
39236c6e
A
3054 break;
3055 }
0a7de745
A
3056 if (!pcb->ipsec_ext_ifdata_stats) {
3057 result = EINVAL;
fe8ab488
A
3058 break;
3059 }
0a7de745
A
3060 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3061 ifnet_stat_increment_in(pcb->ipsec_ifp, utsp->utsp_packets,
3062 utsp->utsp_bytes, utsp->utsp_errors);
3063 } else {
3064 ifnet_stat_increment_out(pcb->ipsec_ifp, utsp->utsp_packets,
3065 utsp->utsp_bytes, utsp->utsp_errors);
3066 }
3067 break;
3068 }
5ba3f43e 3069
0a7de745 3070 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
cb323159
A
3071 ifnet_t del_ifp = NULL;
3072 char name[IFNAMSIZ];
0a7de745
A
3073
3074 if (len > IFNAMSIZ - 1) {
3075 result = EMSGSIZE;
5ba3f43e
A
3076 break;
3077 }
0a7de745
A
3078 if (pcb->ipsec_ifp == NULL) {
3079 // Only can set after connecting
3080 result = EINVAL;
3081 break;
3082 }
cb323159 3083 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
0a7de745
A
3084 bcopy(data, name, len);
3085 name[len] = 0;
3086 result = ifnet_find_by_name(name, &del_ifp);
3087 }
3088 if (result == 0) {
cb323159 3089 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
0a7de745
A
3090 __func__, pcb->ipsec_ifp->if_xname,
3091 del_ifp ? del_ifp->if_xname : "NULL");
5ba3f43e 3092
0a7de745
A
3093 result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp);
3094 if (del_ifp) {
3095 ifnet_release(del_ifp);
a39ff7e2 3096 }
0a7de745
A
3097 }
3098 break;
3099 }
a39ff7e2 3100
0a7de745
A
3101 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3102 if (len != sizeof(int)) {
3103 result = EMSGSIZE;
3104 break;
3105 }
3106 if (pcb->ipsec_ifp == NULL) {
3107 // Only can set after connecting
3108 result = EINVAL;
5ba3f43e
A
3109 break;
3110 }
0a7de745
A
3111 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3112 if (output_service_class == MBUF_SC_UNSPEC) {
3113 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3114 } else {
3115 pcb->ipsec_output_service_class = output_service_class;
3116 }
cb323159 3117 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
0a7de745
A
3118 __func__, pcb->ipsec_ifp->if_xname,
3119 pcb->ipsec_output_service_class);
3120 break;
3121 }
5ba3f43e 3122
0a7de745
A
3123#if IPSEC_NEXUS
3124 case IPSEC_OPT_ENABLE_CHANNEL: {
3125 if (len != sizeof(int)) {
3126 result = EMSGSIZE;
5ba3f43e
A
3127 break;
3128 }
cb323159
A
3129 if (pcb->ipsec_ifp != NULL) {
3130 // Only can set before connecting
0a7de745 3131 result = EINVAL;
5c9f4661
A
3132 break;
3133 }
cb323159
A
3134 if ((*(int *)data) != 0 &&
3135 (*(int *)data) != 1 &&
3136 (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
3137 result = EINVAL;
3138 break;
0a7de745 3139 }
cb323159
A
3140 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3141 pcb->ipsec_kpipe_count = *(int *)data;
3142 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3143 break;
3144 }
3145
3146 case IPSEC_OPT_CHANNEL_BIND_PID: {
3147 if (len != sizeof(pid_t)) {
3148 result = EMSGSIZE;
3149 break;
3150 }
3151 if (pcb->ipsec_ifp != NULL) {
3152 // Only can set before connecting
3153 result = EINVAL;
3154 break;
3155 }
3156 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3157 pcb->ipsec_kpipe_pid = *(pid_t *)data;
3158 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
0a7de745
A
3159 break;
3160 }
3161
3162 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3163 if (len != sizeof(int)) {
3164 result = EMSGSIZE;
5c9f4661
A
3165 break;
3166 }
0a7de745
A
3167 if (pcb->ipsec_ifp == NULL) {
3168 // Only can set after connecting
3169 result = EINVAL;
5c9f4661
A
3170 break;
3171 }
cb323159 3172 if (!if_is_fsw_transport_netagent_enabled()) {
0a7de745 3173 result = ENOTSUP;
5c9f4661
A
3174 break;
3175 }
cb323159 3176 if (uuid_is_null(pcb->ipsec_nx.fsw_agent)) {
0a7de745 3177 result = ENOENT;
5c9f4661
A
3178 break;
3179 }
3180
cb323159
A
3181 uint32_t flags = netagent_get_flags(pcb->ipsec_nx.fsw_agent);
3182
0a7de745 3183 if (*(int *)data) {
cb323159
A
3184 flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
3185 NETAGENT_FLAG_NEXUS_LISTENER);
3186 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
0a7de745
A
3187 pcb->ipsec_needs_netagent = true;
3188 } else {
3189 pcb->ipsec_needs_netagent = false;
cb323159
A
3190 flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
3191 NETAGENT_FLAG_NEXUS_LISTENER);
3192 result = netagent_set_flags(pcb->ipsec_nx.fsw_agent, flags);
0a7de745
A
3193 }
3194 break;
3195 }
3196
3197 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3198 if (len != sizeof(u_int32_t)) {
3199 result = EMSGSIZE;
3200 break;
3201 }
3202 u_int32_t input_frag_size = *(u_int32_t *)data;
3203 if (input_frag_size <= sizeof(struct ip6_hdr)) {
3204 pcb->ipsec_frag_size_set = FALSE;
3205 pcb->ipsec_input_frag_size = 0;
3206 } else {
0a7de745
A
3207 pcb->ipsec_frag_size_set = TRUE;
3208 pcb->ipsec_input_frag_size = input_frag_size;
3209 }
3210 break;
3211 }
3212 case IPSEC_OPT_ENABLE_NETIF: {
3213 if (len != sizeof(int)) {
3214 result = EMSGSIZE;
3215 break;
3216 }
3217 if (pcb->ipsec_ifp != NULL) {
3218 // Only can set before connecting
3219 result = EINVAL;
3220 break;
3221 }
3222 lck_rw_lock_exclusive(&pcb->ipsec_pcb_lock);
3223 pcb->ipsec_use_netif = !!(*(int *)data);
3224 lck_rw_unlock_exclusive(&pcb->ipsec_pcb_lock);
3225 break;
3226 }
3227 case IPSEC_OPT_SLOT_SIZE: {
3228 if (len != sizeof(u_int32_t)) {
3229 result = EMSGSIZE;
3230 break;
3231 }
3232 if (pcb->ipsec_ifp != NULL) {
3233 // Only can set before connecting
3234 result = EINVAL;
39236c6e 3235 break;
0a7de745
A
3236 }
3237 u_int32_t slot_size = *(u_int32_t *)data;
3238 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
3239 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
3240 return EINVAL;
3241 }
3242 pcb->ipsec_slot_size = slot_size;
cb323159
A
3243 if (if_ipsec_debug != 0) {
3244 printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
3245 }
0a7de745 3246 break;
39236c6e 3247 }
0a7de745
A
3248 case IPSEC_OPT_NETIF_RING_SIZE: {
3249 if (len != sizeof(u_int32_t)) {
3250 result = EMSGSIZE;
3251 break;
3252 }
3253 if (pcb->ipsec_ifp != NULL) {
3254 // Only can set before connecting
3255 result = EINVAL;
3256 break;
3257 }
3258 u_int32_t ring_size = *(u_int32_t *)data;
3259 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3260 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3261 return EINVAL;
3262 }
3263 pcb->ipsec_netif_ring_size = ring_size;
cb323159
A
3264 if (if_ipsec_debug != 0) {
3265 printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
3266 }
0a7de745
A
3267 break;
3268 }
3269 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3270 if (len != sizeof(u_int32_t)) {
3271 result = EMSGSIZE;
3272 break;
3273 }
3274 if (pcb->ipsec_ifp != NULL) {
3275 // Only can set before connecting
3276 result = EINVAL;
3277 break;
3278 }
3279 u_int32_t ring_size = *(u_int32_t *)data;
3280 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3281 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3282 return EINVAL;
3283 }
3284 pcb->ipsec_tx_fsw_ring_size = ring_size;
cb323159
A
3285 if (if_ipsec_debug != 0) {
3286 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3287 }
0a7de745
A
3288 break;
3289 }
3290 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3291 if (len != sizeof(u_int32_t)) {
3292 result = EMSGSIZE;
3293 break;
3294 }
3295 if (pcb->ipsec_ifp != NULL) {
3296 // Only can set before connecting
3297 result = EINVAL;
3298 break;
3299 }
3300 u_int32_t ring_size = *(u_int32_t *)data;
3301 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3302 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3303 return EINVAL;
3304 }
3305 pcb->ipsec_rx_fsw_ring_size = ring_size;
cb323159
A
3306 if (if_ipsec_debug != 0) {
3307 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
3308 }
3309 break;
3310 }
3311 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3312 if (len != sizeof(u_int32_t)) {
3313 result = EMSGSIZE;
3314 break;
3315 }
3316 if (pcb->ipsec_ifp != NULL) {
3317 // Only can set before connecting
3318 result = EINVAL;
3319 break;
3320 }
3321 u_int32_t ring_size = *(u_int32_t *)data;
3322 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3323 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3324 return EINVAL;
3325 }
3326 pcb->ipsec_kpipe_tx_ring_size = ring_size;
3327 if (if_ipsec_debug != 0) {
3328 printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
3329 }
3330 break;
3331 }
3332 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3333 if (len != sizeof(u_int32_t)) {
3334 result = EMSGSIZE;
3335 break;
3336 }
3337 if (pcb->ipsec_ifp != NULL) {
3338 // Only can set before connecting
3339 result = EINVAL;
3340 break;
3341 }
3342 u_int32_t ring_size = *(u_int32_t *)data;
3343 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
3344 ring_size > IPSEC_IF_MAX_RING_SIZE) {
3345 return EINVAL;
3346 }
3347 pcb->ipsec_kpipe_rx_ring_size = ring_size;
3348 if (if_ipsec_debug != 0) {
3349 printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
3350 }
0a7de745
A
3351 break;
3352 }
3353
3354#endif // IPSEC_NEXUS
3355
cb323159 3356 default: {
0a7de745
A
3357 result = ENOPROTOOPT;
3358 break;
3359 }
cb323159 3360 }
0a7de745 3361
39236c6e
A
3362 return result;
3363}
3364
3365static errno_t
5ba3f43e 3366ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
0a7de745
A
3367 __unused u_int32_t unit,
3368 void *unitinfo,
3369 int opt,
3370 void *data,
3371 size_t *len)
39236c6e 3372{
5ba3f43e 3373 errno_t result = 0;
94ff46dc
A
3374 struct ipsec_pcb *pcb = unitinfo;
3375 if (pcb == NULL) {
3376 return EINVAL;
3377 }
0a7de745 3378
39236c6e 3379 switch (opt) {
0a7de745
A
3380 case IPSEC_OPT_FLAGS: {
3381 if (*len != sizeof(u_int32_t)) {
3382 result = EMSGSIZE;
3383 } else {
cb323159 3384 *(u_int32_t *)data = pcb->ipsec_external_flags;
5ba3f43e 3385 }
0a7de745
A
3386 break;
3387 }
3388
3389 case IPSEC_OPT_EXT_IFDATA_STATS: {
3390 if (*len != sizeof(int)) {
3391 result = EMSGSIZE;
3392 } else {
3393 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
5ba3f43e 3394 }
0a7de745
A
3395 break;
3396 }
3397
3398 case IPSEC_OPT_IFNAME: {
3399 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
3400 result = EMSGSIZE;
3401 } else {
3402 if (pcb->ipsec_ifp == NULL) {
3403 // Only can get after connecting
3404 result = EINVAL;
3405 break;
5ba3f43e 3406 }
4ba76501 3407 *len = scnprintf(data, *len, "%s", pcb->ipsec_if_xname) + 1;
5ba3f43e 3408 }
0a7de745
A
3409 break;
3410 }
3411
3412 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3413 if (*len != sizeof(int)) {
3414 result = EMSGSIZE;
3415 } else {
3416 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
fe8ab488 3417 }
0a7de745
A
3418 break;
3419 }
5ba3f43e
A
3420
3421#if IPSEC_NEXUS
a39ff7e2 3422
0a7de745
A
3423 case IPSEC_OPT_ENABLE_CHANNEL: {
3424 if (*len != sizeof(int)) {
3425 result = EMSGSIZE;
3426 } else {
3427 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159
A
3428 *(int *)data = pcb->ipsec_kpipe_count;
3429 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3430 }
3431 break;
3432 }
3433
3434 case IPSEC_OPT_CHANNEL_BIND_PID: {
3435 if (*len != sizeof(pid_t)) {
3436 result = EMSGSIZE;
3437 } else {
3438 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
3439 *(pid_t *)data = pcb->ipsec_kpipe_pid;
0a7de745 3440 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
a39ff7e2 3441 }
0a7de745
A
3442 break;
3443 }
a39ff7e2 3444
0a7de745
A
3445 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
3446 if (*len != sizeof(int)) {
3447 result = EMSGSIZE;
3448 } else {
cb323159 3449 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
a39ff7e2 3450 }
0a7de745
A
3451 break;
3452 }
a39ff7e2 3453
0a7de745
A
3454 case IPSEC_OPT_ENABLE_NETIF: {
3455 if (*len != sizeof(int)) {
3456 result = EMSGSIZE;
3457 } else {
5ba3f43e 3458 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
0a7de745 3459 *(int *)data = !!pcb->ipsec_use_netif;
5ba3f43e 3460 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 3461 }
0a7de745
A
3462 break;
3463 }
5ba3f43e 3464
0a7de745
A
3465 case IPSEC_OPT_GET_CHANNEL_UUID: {
3466 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
cb323159 3467 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
0a7de745 3468 result = ENXIO;
cb323159 3469 } else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
0a7de745
A
3470 result = EMSGSIZE;
3471 } else {
cb323159
A
3472 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3473 uuid_copy(((uuid_t *)data)[i], pcb->ipsec_kpipe_uuid[i]);
3474 }
5ba3f43e 3475 }
0a7de745
A
3476 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3477 break;
3478 }
3479
3480 case IPSEC_OPT_INPUT_FRAG_SIZE: {
3481 if (*len != sizeof(u_int32_t)) {
3482 result = EMSGSIZE;
3483 } else {
3484 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
5c9f4661 3485 }
0a7de745
A
3486 break;
3487 }
3488 case IPSEC_OPT_SLOT_SIZE: {
3489 if (*len != sizeof(u_int32_t)) {
3490 result = EMSGSIZE;
3491 } else {
3492 *(u_int32_t *)data = pcb->ipsec_slot_size;
5c9f4661 3493 }
0a7de745
A
3494 break;
3495 }
3496 case IPSEC_OPT_NETIF_RING_SIZE: {
3497 if (*len != sizeof(u_int32_t)) {
3498 result = EMSGSIZE;
3499 } else {
3500 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
5c9f4661 3501 }
0a7de745
A
3502 break;
3503 }
3504 case IPSEC_OPT_TX_FSW_RING_SIZE: {
3505 if (*len != sizeof(u_int32_t)) {
3506 result = EMSGSIZE;
3507 } else {
3508 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
3509 }
3510 break;
3511 }
3512 case IPSEC_OPT_RX_FSW_RING_SIZE: {
3513 if (*len != sizeof(u_int32_t)) {
3514 result = EMSGSIZE;
3515 } else {
3516 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
5c9f4661 3517 }
0a7de745
A
3518 break;
3519 }
cb323159
A
3520 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
3521 if (*len != sizeof(u_int32_t)) {
3522 result = EMSGSIZE;
3523 } else {
3524 *(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
3525 }
3526 break;
3527 }
3528 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
3529 if (*len != sizeof(u_int32_t)) {
3530 result = EMSGSIZE;
3531 } else {
3532 *(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
3533 }
3534 break;
3535 }
5c9f4661 3536
5ba3f43e
A
3537#endif // IPSEC_NEXUS
3538
0a7de745
A
3539 default: {
3540 result = ENOPROTOOPT;
3541 break;
3542 }
39236c6e 3543 }
0a7de745 3544
39236c6e
A
3545 return result;
3546}
3547
3548/* Network Interface functions */
3549static errno_t
5ba3f43e 3550ipsec_output(ifnet_t interface,
0a7de745 3551 mbuf_t data)
39236c6e 3552{
5ba3f43e 3553 struct ipsec_pcb *pcb = ifnet_softc(interface);
0a7de745
A
3554 struct ipsec_output_state ipsec_state;
3555 struct route ro;
3556 struct route_in6 ro6;
3557 int length;
3558 struct ip *ip;
3559 struct ip6_hdr *ip6;
3560 struct ip_out_args ipoa;
3561 struct ip6_out_args ip6oa;
3562 int error = 0;
3563 u_int ip_version = 0;
3564 int flags = 0;
3565 struct flowadv *adv = NULL;
3566
fe8ab488
A
3567 // Make sure this packet isn't looping through the interface
3568 if (necp_get_last_interface_index_from_packet(data) == interface->if_index) {
5ba3f43e 3569 error = EINVAL;
fe8ab488
A
3570 goto ipsec_output_err;
3571 }
0a7de745 3572
fe8ab488
A
3573 // Mark the interface so NECP can evaluate tunnel policy
3574 necp_mark_packet_from_interface(data, interface);
0a7de745
A
3575
3576 ip = mtod(data, struct ip *);
3577 ip_version = ip->ip_v;
3578
3579 switch (ip_version) {
3580 case 4: {
5c9f4661 3581#if IPSEC_NEXUS
0a7de745 3582 if (!pcb->ipsec_use_netif)
5c9f4661 3583#endif // IPSEC_NEXUS
0a7de745
A
3584 {
3585 int af = AF_INET;
3586 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3587 }
3588
3589 /* Apply encryption */
3590 memset(&ipsec_state, 0, sizeof(ipsec_state));
3591 ipsec_state.m = data;
3592 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
3593 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3594
3595 error = ipsec4_interface_output(&ipsec_state, interface);
3596 /* Tunneled in IPv6 - packet is gone */
3597 if (error == 0 && ipsec_state.tunneled == 6) {
3598 goto done;
3599 }
3600
3601 data = ipsec_state.m;
3602 if (error || data == NULL) {
3603 if (error) {
cb323159 3604 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
5c9f4661 3605 }
0a7de745
A
3606 goto ipsec_output_err;
3607 }
5c9f4661 3608
0a7de745
A
3609 /* Set traffic class, set flow */
3610 m_set_service_class(data, pcb->ipsec_output_service_class);
3611 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3612 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3613 data->m_pkthdr.pkt_proto = ip->ip_p;
3614 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3615
3616 /* Flip endian-ness for ip_output */
3617 ip = mtod(data, struct ip *);
3618 NTOHS(ip->ip_len);
3619 NTOHS(ip->ip_off);
3620
3621 /* Increment statistics */
3622 length = mbuf_pkthdr_len(data);
3623 ifnet_stat_increment_out(interface, 1, length, 0);
3624
3625 /* Send to ip_output */
3626 memset(&ro, 0, sizeof(ro));
3627
3628 flags = (IP_OUTARGS | /* Passing out args to specify interface */
3629 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
3630
3631 memset(&ipoa, 0, sizeof(ipoa));
3632 ipoa.ipoa_flowadv.code = 0;
3633 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
3634 if (ipsec_state.outgoing_if) {
3635 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
3636 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
5ba3f43e 3637 }
0a7de745
A
3638 ipsec_set_ipoa_for_interface(pcb->ipsec_ifp, &ipoa);
3639
3640 adv = &ipoa.ipoa_flowadv;
3641
3642 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
3643 data = NULL;
3644
3645 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3646 error = ENOBUFS;
3647 ifnet_disable_output(interface);
3648 }
3649
3650 goto done;
3651 }
3652 case 6: {
5c9f4661 3653#if IPSEC_NEXUS
0a7de745 3654 if (!pcb->ipsec_use_netif)
5c9f4661 3655#endif // IPSEC_NEXUS
0a7de745
A
3656 {
3657 int af = AF_INET6;
3658 bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af));
3659 }
5c9f4661 3660
0a7de745
A
3661 data = ipsec6_splithdr(data);
3662 if (data == NULL) {
cb323159 3663 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
0a7de745
A
3664 goto ipsec_output_err;
3665 }
3e170ce0 3666
0a7de745
A
3667 ip6 = mtod(data, struct ip6_hdr *);
3668
3669 memset(&ipsec_state, 0, sizeof(ipsec_state));
3670 ipsec_state.m = data;
3671 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
3672 memset(&ipsec_state.ro, 0, sizeof(ipsec_state.ro));
3673
3674 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
3675 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
3676 goto done;
3677 }
3678 data = ipsec_state.m;
3679 if (error || data == NULL) {
3680 if (error) {
cb323159 3681 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
5ba3f43e 3682 }
0a7de745 3683 goto ipsec_output_err;
5ba3f43e 3684 }
0a7de745
A
3685
3686 /* Set traffic class, set flow */
3687 m_set_service_class(data, pcb->ipsec_output_service_class);
3688 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
3689 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
3690 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
3691 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
3692
3693 /* Increment statistics */
3694 length = mbuf_pkthdr_len(data);
3695 ifnet_stat_increment_out(interface, 1, length, 0);
3696
3697 /* Send to ip6_output */
3698 memset(&ro6, 0, sizeof(ro6));
3699
3700 flags = IPV6_OUTARGS;
3701
3702 memset(&ip6oa, 0, sizeof(ip6oa));
3703 ip6oa.ip6oa_flowadv.code = 0;
3704 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
3705 if (ipsec_state.outgoing_if) {
3706 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
3707 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
5ba3f43e 3708 }
0a7de745
A
3709 ipsec_set_ip6oa_for_interface(pcb->ipsec_ifp, &ip6oa);
3710
3711 adv = &ip6oa.ip6oa_flowadv;
3712
3713 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
3714 data = NULL;
3715
3716 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
3717 error = ENOBUFS;
3718 ifnet_disable_output(interface);
3719 }
3720
3721 goto done;
3722 }
3723 default: {
cb323159 3724 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
0a7de745
A
3725 error = EINVAL;
3726 goto ipsec_output_err;
3727 }
3728 }
3729
39236c6e 3730done:
0a7de745
A
3731 return error;
3732
39236c6e 3733ipsec_output_err:
0a7de745
A
3734 if (data) {
3735 mbuf_freem(data);
3736 }
39236c6e
A
3737 goto done;
3738}
3739
3740static void
0a7de745 3741ipsec_start(ifnet_t interface)
39236c6e 3742{
fe8ab488 3743 mbuf_t data;
5ba3f43e 3744 struct ipsec_pcb *pcb = ifnet_softc(interface);
fe8ab488 3745
5ba3f43e 3746 VERIFY(pcb != NULL);
fe8ab488 3747 for (;;) {
0a7de745 3748 if (ifnet_dequeue(interface, &data) != 0) {
fe8ab488 3749 break;
0a7de745
A
3750 }
3751 if (ipsec_output(interface, data) != 0) {
fe8ab488 3752 break;
0a7de745 3753 }
fe8ab488 3754 }
39236c6e
A
3755}
3756
3757/* Network Interface functions */
3758static errno_t
0a7de745
A
3759ipsec_demux(__unused ifnet_t interface,
3760 mbuf_t data,
3761 __unused char *frame_header,
3762 protocol_family_t *protocol)
39236c6e 3763{
0a7de745
A
3764 struct ip *ip;
3765 u_int ip_version;
3766
39236c6e
A
3767 while (data != NULL && mbuf_len(data) < 1) {
3768 data = mbuf_next(data);
3769 }
0a7de745
A
3770
3771 if (data == NULL) {
39236c6e 3772 return ENOENT;
39236c6e 3773 }
0a7de745
A
3774
3775 ip = mtod(data, struct ip *);
3776 ip_version = ip->ip_v;
3777
3778 switch (ip_version) {
3779 case 4:
3780 *protocol = PF_INET;
3781 return 0;
3782 case 6:
3783 *protocol = PF_INET6;
3784 return 0;
3785 default:
3786 break;
3787 }
3788
39236c6e
A
3789 return 0;
3790}
3791
3792static errno_t
0a7de745
A
3793ipsec_add_proto(__unused ifnet_t interface,
3794 protocol_family_t protocol,
3795 __unused const struct ifnet_demux_desc *demux_array,
3796 __unused u_int32_t demux_count)
39236c6e 3797{
0a7de745
A
3798 switch (protocol) {
3799 case PF_INET:
3800 return 0;
3801 case PF_INET6:
3802 return 0;
3803 default:
3804 break;
39236c6e 3805 }
0a7de745 3806
39236c6e
A
3807 return ENOPROTOOPT;
3808}
3809
3810static errno_t
0a7de745
A
3811ipsec_del_proto(__unused ifnet_t interface,
3812 __unused protocol_family_t protocol)
39236c6e
A
3813{
3814 return 0;
3815}
3816
3817static errno_t
5ba3f43e 3818ipsec_ioctl(ifnet_t interface,
0a7de745
A
3819 u_long command,
3820 void *data)
39236c6e 3821{
d9a64523
A
3822#if IPSEC_NEXUS
3823 struct ipsec_pcb *pcb = ifnet_softc(interface);
3824#endif
0a7de745
A
3825 errno_t result = 0;
3826
3827 switch (command) {
3828 case SIOCSIFMTU: {
5ba3f43e 3829#if IPSEC_NEXUS
0a7de745
A
3830 if (pcb->ipsec_use_netif) {
3831 // Make sure we can fit packets in the channel buffers
3832 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
3833 result = EINVAL;
3834 } else {
3835 ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
5c9f4661 3836 }
0a7de745
A
3837 } else
3838#endif // IPSEC_NEXUS
3839 {
3840 ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
5c9f4661 3841 }
0a7de745 3842 break;
39236c6e 3843 }
0a7de745
A
3844
3845 case SIOCSIFFLAGS:
3846 /* ifioctl() takes care of it */
3847 break;
3848
cb323159
A
3849 case SIOCSIFSUBFAMILY: {
3850 uint32_t subfamily;
3851
3852 subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
3853 switch (subfamily) {
3854 case IFRTYPE_SUBFAMILY_BLUETOOTH:
3855 interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
3856 break;
3857 case IFRTYPE_SUBFAMILY_WIFI:
3858 interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
3859 break;
3860 case IFRTYPE_SUBFAMILY_QUICKRELAY:
3861 interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
3862 break;
3863 case IFRTYPE_SUBFAMILY_DEFAULT:
3864 interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
3865 break;
3866 default:
3867 result = EINVAL;
3868 break;
3869 }
3870 break;
3871 }
3872
0a7de745
A
3873 default:
3874 result = EOPNOTSUPP;
3875 }
3876
39236c6e
A
3877 return result;
3878}
3879
3880static void
5ba3f43e 3881ipsec_detached(ifnet_t interface)
39236c6e 3882{
5ba3f43e 3883 struct ipsec_pcb *pcb = ifnet_softc(interface);
cb323159 3884
5ba3f43e 3885 (void)ifnet_release(interface);
5c9f4661 3886 ipsec_free_pcb(pcb, true);
39236c6e
A
3887}
3888
3889/* Protocol Handlers */
3890
3891static errno_t
fe8ab488 3892ipsec_proto_input(ifnet_t interface,
0a7de745
A
3893 protocol_family_t protocol,
3894 mbuf_t m,
3895 __unused char *frame_header)
39236c6e 3896{
fe8ab488 3897 mbuf_pkthdr_setrcvif(m, interface);
5c9f4661
A
3898
3899#if IPSEC_NEXUS
3900 struct ipsec_pcb *pcb = ifnet_softc(interface);
3901 if (!pcb->ipsec_use_netif)
3902#endif // IPSEC_NEXUS
3903 {
3904 uint32_t af = 0;
3905 struct ip *ip = mtod(m, struct ip *);
3906 if (ip->ip_v == 4) {
3907 af = AF_INET;
3908 } else if (ip->ip_v == 6) {
3909 af = AF_INET6;
3910 }
3911 bpf_tap_in(interface, DLT_NULL, m, &af, sizeof(af));
a39ff7e2 3912 pktap_input(interface, protocol, m, NULL);
5c9f4661 3913 }
39037602 3914
0a7de745 3915 int32_t pktlen = m->m_pkthdr.len;
3e170ce0
A
3916 if (proto_input(protocol, m) != 0) {
3917 ifnet_stat_increment_in(interface, 0, 0, 1);
39236c6e 3918 m_freem(m);
3e170ce0 3919 } else {
a39ff7e2 3920 ifnet_stat_increment_in(interface, 1, pktlen, 0);
3e170ce0 3921 }
0a7de745 3922
39236c6e
A
3923 return 0;
3924}
3925
3926static errno_t
0a7de745
A
3927ipsec_proto_pre_output(__unused ifnet_t interface,
3928 protocol_family_t protocol,
3929 __unused mbuf_t *packet,
3930 __unused const struct sockaddr *dest,
3931 __unused void *route,
3932 __unused char *frame_type,
3933 __unused char *link_layer_dest)
39236c6e 3934{
39236c6e
A
3935 *(protocol_family_t *)(void *)frame_type = protocol;
3936 return 0;
3937}
3938
3939static errno_t
0a7de745
A
3940ipsec_attach_proto(ifnet_t interface,
3941 protocol_family_t protocol)
39236c6e 3942{
0a7de745
A
3943 struct ifnet_attach_proto_param proto;
3944 errno_t result;
3945
39236c6e
A
3946 bzero(&proto, sizeof(proto));
3947 proto.input = ipsec_proto_input;
3948 proto.pre_output = ipsec_proto_pre_output;
0a7de745 3949
39236c6e
A
3950 result = ifnet_attach_protocol(interface, protocol, &proto);
3951 if (result != 0 && result != EEXIST) {
cb323159 3952 os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
0a7de745 3953 protocol, result);
39236c6e 3954 }
0a7de745 3955
39236c6e
A
3956 return result;
3957}
fe8ab488 3958
5ba3f43e 3959errno_t
0a7de745
A
3960ipsec_inject_inbound_packet(ifnet_t interface,
3961 mbuf_t packet)
5ba3f43e 3962{
a39ff7e2 3963#if IPSEC_NEXUS
5ba3f43e
A
3964 struct ipsec_pcb *pcb = ifnet_softc(interface);
3965
5c9f4661 3966 if (pcb->ipsec_use_netif) {
cb323159
A
3967 if (!ipsec_data_move_begin(pcb)) {
3968 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
3969 if_name(pcb->ipsec_ifp));
3970 return ENXIO;
3971 }
3972
5c9f4661 3973 lck_rw_lock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 3974
5c9f4661 3975 lck_mtx_lock(&pcb->ipsec_input_chain_lock);
cb323159
A
3976
3977 if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
3978 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
3979 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
3980 ipsec_data_move_end(pcb);
3981 return ENOSPC;
3982 }
3983
5c9f4661
A
3984 if (pcb->ipsec_input_chain != NULL) {
3985 pcb->ipsec_input_chain_last->m_nextpkt = packet;
3986 } else {
3987 pcb->ipsec_input_chain = packet;
3988 }
cb323159 3989 pcb->ipsec_input_chain_count++;
5c9f4661
A
3990 while (packet->m_nextpkt) {
3991 VERIFY(packet != packet->m_nextpkt);
3992 packet = packet->m_nextpkt;
cb323159 3993 pcb->ipsec_input_chain_count++;
5c9f4661
A
3994 }
3995 pcb->ipsec_input_chain_last = packet;
3996 lck_mtx_unlock(&pcb->ipsec_input_chain_lock);
5ba3f43e 3997
cb323159 3998 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
5c9f4661 3999 lck_rw_unlock_shared(&pcb->ipsec_pcb_lock);
5ba3f43e 4000
5c9f4661
A
4001 if (rx_ring != NULL) {
4002 kern_channel_notify(rx_ring, 0);
4003 }
5ba3f43e 4004
cb323159 4005 ipsec_data_move_end(pcb);
0a7de745 4006 return 0;
5c9f4661
A
4007 } else
4008#endif // IPSEC_NEXUS
4009 {
4010 errno_t error;
4011 protocol_family_t protocol;
4012 if ((error = ipsec_demux(interface, packet, NULL, &protocol)) != 0) {
4013 return error;
4014 }
5ba3f43e 4015
5c9f4661
A
4016 return ipsec_proto_input(interface, protocol, packet, NULL);
4017 }
fe8ab488
A
4018}
4019
4020void
4021ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family)
4022{
4023 if (packet != NULL && interface != NULL) {
4024 struct ipsec_pcb *pcb = ifnet_softc(interface);
4025 if (pcb != NULL) {
4026 /* Set traffic class, set flow */
4027 m_set_service_class(packet, pcb->ipsec_output_service_class);
4028 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4029 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
4030 if (family == AF_INET) {
4031 struct ip *ip = mtod(packet, struct ip *);
4032 packet->m_pkthdr.pkt_proto = ip->ip_p;
3e170ce0 4033 } else if (family == AF_INET6) {
fe8ab488
A
4034 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
4035 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4036 }
4037 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4038 }
4039 }
4040}
39037602
A
4041
4042void
4043ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
4044{
4045 struct ipsec_pcb *pcb;
0a7de745
A
4046
4047 if (interface == NULL || ipoa == NULL) {
39037602 4048 return;
0a7de745 4049 }
39037602 4050 pcb = ifnet_softc(interface);
0a7de745 4051
39037602
A
4052 if (net_qos_policy_restricted == 0) {
4053 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
4054 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4055 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
0a7de745 4056 net_qos_policy_restrict_avapps != 0) {
39037602
A
4057 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4058 } else {
4059 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4060 ipoa->ipoa_sotc = SO_TC_VO;
4061 }
4062}
4063
4064void
4065ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
4066{
4067 struct ipsec_pcb *pcb;
0a7de745
A
4068
4069 if (interface == NULL || ip6oa == NULL) {
39037602 4070 return;
0a7de745 4071 }
39037602 4072 pcb = ifnet_softc(interface);
0a7de745 4073
39037602
A
4074 if (net_qos_policy_restricted == 0) {
4075 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
4076 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
4077 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
0a7de745 4078 net_qos_policy_restrict_avapps != 0) {
39037602
A
4079 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
4080 } else {
4081 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
4082 ip6oa->ip6oa_sotc = SO_TC_VO;
4083 }
4084}
cb323159 4085
cb323159
A
4086
4087static void
4088ipsec_data_move_drain(struct ipsec_pcb *pcb)
4089{
4090 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4091 /* data path must already be marked as not ready */
4092 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4093 pcb->ipsec_pcb_drainers++;
4094 while (pcb->ipsec_pcb_data_move != 0) {
4095 (void)msleep(&(pcb->ipsec_pcb_data_move), &pcb->ipsec_pcb_data_move_lock,
4096 (PZERO - 1), __func__, NULL);
4097 }
4098 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
4099 VERIFY(pcb->ipsec_pcb_drainers > 0);
4100 pcb->ipsec_pcb_drainers--;
4101 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4102}
4103
4104static void
4105ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
4106{
4107 /*
4108 * Mark the data path as not usable.
4109 */
4110 lck_mtx_lock(&pcb->ipsec_pcb_data_move_lock);
4111 IPSEC_CLR_DATA_PATH_READY(pcb);
4112 lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
4113
4114 /* Wait until all threads in the data paths are done. */
4115 ipsec_data_move_drain(pcb);
4116}