+/* Protocol handlers */
+static errno_t utun_attach_proto(ifnet_t interface, protocol_family_t proto);
+static errno_t utun_proto_input(ifnet_t interface, protocol_family_t protocol,
+ mbuf_t m, char *frame_header);
+static errno_t utun_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
+ mbuf_t *packet, const struct sockaddr *dest, void *route,
+ char *frame_type, char *link_layer_dest);
+static errno_t utun_pkt_input (struct utun_pcb *pcb, mbuf_t m);
+
+#if UTUN_NEXUS
+
+#define UTUN_IF_DEFAULT_SLOT_SIZE 4096
+#define UTUN_IF_DEFAULT_RING_SIZE 64
+#define UTUN_IF_DEFAULT_TX_FSW_RING_SIZE 64
+#define UTUN_IF_DEFAULT_RX_FSW_RING_SIZE 128
+#define UTUN_IF_HEADROOM_SIZE 32
+
+#define UTUN_IF_MIN_RING_SIZE 16
+#define UTUN_IF_MAX_RING_SIZE 1024
+
+static int sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS;
+static int sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
+static int sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
+
+static int if_utun_ring_size = UTUN_IF_DEFAULT_RING_SIZE;
+static int if_utun_tx_fsw_ring_size = UTUN_IF_DEFAULT_TX_FSW_RING_SIZE;
+static int if_utun_rx_fsw_ring_size = UTUN_IF_DEFAULT_RX_FSW_RING_SIZE;
+
+SYSCTL_DECL(_net_utun);
+SYSCTL_NODE(_net, OID_AUTO, utun, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "UTun");
+
+SYSCTL_PROC(_net_utun, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
+ &if_utun_ring_size, UTUN_IF_DEFAULT_RING_SIZE, &sysctl_if_utun_ring_size, "I", "");
+SYSCTL_PROC(_net_utun, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
+ &if_utun_tx_fsw_ring_size, UTUN_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_utun_tx_fsw_ring_size, "I", "");
+SYSCTL_PROC(_net_utun, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
+ &if_utun_rx_fsw_ring_size, UTUN_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_utun_rx_fsw_ring_size, "I", "");
+
+static errno_t
+utun_register_nexus(void);
+
+static errno_t
+utun_netif_prepare(__unused kern_nexus_t nexus, ifnet_t ifp);
+static errno_t
+utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
+ proc_t p, kern_nexus_t nexus,
+ nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx);
+static errno_t
+utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_t channel);
+static void
+utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_t channel);
+static void
+utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_t channel);
+static void
+utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_t channel);
+static errno_t
+utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
+ void **ring_ctx);
+static void
+utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_ring_t ring);
+static errno_t
+utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_ring_t ring, uint32_t flags);
+static errno_t
+utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_ring_t ring, uint32_t flags);
+#endif // UTUN_NEXUS
+
+#define UTUN_DEFAULT_MTU 1500
+#define UTUN_HEADER_SIZE(_pcb) (sizeof(u_int32_t) + (((_pcb)->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) ? sizeof(uuid_t) : 0))
+
+static kern_ctl_ref utun_kctlref;
+static u_int32_t utun_family;
+static lck_attr_t *utun_lck_attr;
+static lck_grp_attr_t *utun_lck_grp_attr;
+static lck_grp_t *utun_lck_grp;
+static lck_mtx_t utun_lock;
+
+TAILQ_HEAD(utun_list, utun_pcb) utun_head;
+
+#define UTUN_PCB_ZONE_MAX 32
+#define UTUN_PCB_ZONE_NAME "net.if_utun"
+
+static unsigned int utun_pcb_size; /* size of zone element */
+static struct zone *utun_pcb_zone; /* zone for utun_pcb */
+
+#if UTUN_NEXUS
+
+static int
+sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int value = if_utun_ring_size;
+
+ int error = sysctl_handle_int(oidp, &value, 0, req);
+ if (error || !req->newptr) {
+ return (error);
+ }
+
+ if (value < UTUN_IF_MIN_RING_SIZE ||
+ value > UTUN_IF_MAX_RING_SIZE) {
+ return (EINVAL);
+ }
+
+ if_utun_ring_size = value;
+
+ return (0);
+}
+
+static int
+sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int value = if_utun_tx_fsw_ring_size;
+
+ int error = sysctl_handle_int(oidp, &value, 0, req);
+ if (error || !req->newptr) {
+ return (error);
+ }
+
+ if (value < UTUN_IF_MIN_RING_SIZE ||
+ value > UTUN_IF_MAX_RING_SIZE) {
+ return (EINVAL);
+ }
+
+ if_utun_tx_fsw_ring_size = value;
+
+ return (0);
+}
+
+static int
+sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+ int value = if_utun_rx_fsw_ring_size;
+
+ int error = sysctl_handle_int(oidp, &value, 0, req);
+ if (error || !req->newptr) {
+ return (error);
+ }
+
+ if (value < UTUN_IF_MIN_RING_SIZE ||
+ value > UTUN_IF_MAX_RING_SIZE) {
+ return (EINVAL);
+ }
+
+ if_utun_rx_fsw_ring_size = value;
+
+ return (0);
+}
+
+static errno_t
+utun_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
+ void **ring_ctx)
+{
+#pragma unused(nxprov)
+#pragma unused(channel)
+#pragma unused(ring_ctx)
+ struct utun_pcb *pcb = kern_nexus_get_context(nexus);
+ if (!is_tx_ring) {
+ VERIFY(pcb->utun_netif_rxring == NULL);
+ pcb->utun_netif_rxring = ring;
+ } else {
+ VERIFY(pcb->utun_netif_txring == NULL);
+ pcb->utun_netif_txring = ring;
+ }
+ return 0;
+}
+
+static void
+utun_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_ring_t ring)
+{
+#pragma unused(nxprov)
+ struct utun_pcb *pcb = kern_nexus_get_context(nexus);
+ if (pcb->utun_netif_rxring == ring) {
+ pcb->utun_netif_rxring = NULL;
+ } else if (pcb->utun_netif_txring == ring) {
+ pcb->utun_netif_txring = NULL;
+ }
+}
+
+static errno_t
+utun_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_ring_t tx_ring, uint32_t flags)
+{
+#pragma unused(nxprov)
+#pragma unused(flags)
+ struct utun_pcb *pcb = kern_nexus_get_context(nexus);
+
+ struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
+
+ lck_rw_lock_shared(&pcb->utun_pcb_lock);
+
+ struct kern_channel_ring_stat_increment tx_ring_stats;
+ bzero(&tx_ring_stats, sizeof(tx_ring_stats));
+ kern_channel_slot_t tx_pslot = NULL;
+ kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
+
+ STATS_INC(nifs, NETIF_STATS_TXSYNC);
+
+ if (tx_slot == NULL) {
+ // Nothing to write, don't bother signalling
+ lck_rw_unlock_shared(&pcb->utun_pcb_lock);
+ return 0;
+ }
+
+ if (pcb->utun_kpipe_enabled) {
+ kern_channel_ring_t rx_ring = pcb->utun_kpipe_rxring;
+ lck_rw_unlock_shared(&pcb->utun_pcb_lock);
+
+ // Signal the kernel pipe ring to read
+ if (rx_ring != NULL) {
+ kern_channel_notify(rx_ring, 0);
+ }
+ return 0;
+ }
+
+ // If we're here, we're injecting into the utun kernel control socket
+ while (tx_slot != NULL) {
+ size_t length = 0;
+ mbuf_t data = NULL;
+
+ kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
+
+ if (tx_ph == 0) {
+ // Advance TX ring
+ tx_pslot = tx_slot;
+ tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
+ continue;
+ }
+ (void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
+
+ // Advance TX ring
+ tx_pslot = tx_slot;
+ tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
+
+ kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
+ VERIFY(tx_buf != NULL);
+
+ /* tx_baddr is the absolute buffer address */
+ uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
+ VERIFY(tx_baddr != 0);
+
+ bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
+
+ uint16_t tx_offset = kern_buflet_get_data_offset(tx_buf);
+ uint32_t tx_length = kern_buflet_get_data_length(tx_buf);
+
+ // The offset must be large enough for the headers
+ VERIFY(tx_offset >= UTUN_HEADER_SIZE(pcb));
+
+ // Find family
+ uint32_t af = 0;
+ uint8_t vhl = *(uint8_t *)(tx_baddr + tx_offset);
+ u_int ip_version = (vhl >> 4);
+ switch (ip_version) {
+ case 4: {
+ af = AF_INET;
+ break;
+ }
+ case 6: {
+ af = AF_INET6;
+ break;
+ }
+ default: {
+ printf("utun_netif_sync_tx %s: unknown ip version %u vhl %u tx_offset %u len %u header_size %zu\n",
+ pcb->utun_ifp->if_xname, ip_version, vhl, tx_offset, tx_length,
+ UTUN_HEADER_SIZE(pcb));
+ break;
+ }
+ }
+
+ tx_offset -= UTUN_HEADER_SIZE(pcb);
+ tx_length += UTUN_HEADER_SIZE(pcb);
+ tx_baddr += tx_offset;
+
+ length = MIN(tx_length, UTUN_IF_DEFAULT_SLOT_SIZE);
+
+ // Copy in family
+ memcpy(tx_baddr, &af, sizeof(af));
+ if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
+ kern_packet_get_euuid(tx_ph, (void *)(tx_baddr + sizeof(af)));
+ }
+
+ if (length > 0) {
+ errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
+ if (error == 0) {
+ error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
+ if (error == 0) {
+ error = utun_output(pcb->utun_ifp, data);
+ if (error != 0) {
+ printf("utun_netif_sync_tx %s - utun_output error %d\n", pcb->utun_ifp->if_xname, error);
+ }
+ } else {
+ printf("utun_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->utun_ifp->if_xname, length, error);
+ STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
+ STATS_INC(nifs, NETIF_STATS_DROPPED);
+ mbuf_freem(data);
+ data = NULL;
+ }
+ } else {
+ printf("utun_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->utun_ifp->if_xname, error);
+ STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
+ STATS_INC(nifs, NETIF_STATS_DROPPED);
+ }
+ } else {
+ printf("utun_netif_sync_tx %s - 0 length packet\n", pcb->utun_ifp->if_xname);
+ STATS_INC(nifs, NETIF_STATS_NOMEM_MBUF);
+ STATS_INC(nifs, NETIF_STATS_DROPPED);
+ }
+
+ kern_pbufpool_free(tx_ring->ckr_pp, tx_ph);
+
+ if (data == NULL) {
+ continue;
+ }
+
+ STATS_INC(nifs, NETIF_STATS_TXPKTS);
+ STATS_INC(nifs, NETIF_STATS_TXCOPY_MBUF);
+
+ tx_ring_stats.kcrsi_slots_transferred++;
+ tx_ring_stats.kcrsi_bytes_transferred += length;
+ }
+
+ if (tx_pslot) {
+ kern_channel_advance_slot(tx_ring, tx_pslot);
+ kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
+ (void)kern_channel_reclaim(tx_ring);
+ }
+
+ lck_rw_unlock_shared(&pcb->utun_pcb_lock);
+
+ return 0;
+}
+
+static errno_t
+utun_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_ring_t ring, __unused uint32_t flags)
+{
+#pragma unused(nxprov)
+ struct utun_pcb *pcb = kern_nexus_get_context(nexus);
+
+ lck_rw_lock_shared(&pcb->utun_pcb_lock);
+
+ boolean_t more = false;
+ errno_t rc = 0;
+ do {
+ /* Refill and sync the ring */
+ rc = kern_channel_tx_refill(ring, UINT32_MAX, UINT32_MAX, true, &more);
+ if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
+ printf("%s, tx refill failed %d\n", __func__, rc);
+ }
+ } while ((rc == 0) && more);
+
+ if (pcb->utun_kpipe_enabled && !pcb->utun_output_disabled) {
+ uint32_t tx_available = kern_channel_available_slot_count(ring);
+ if (pcb->utun_netif_txring_size > 0 &&
+ tx_available >= pcb->utun_netif_txring_size - 1) {
+ // No room left in tx ring, disable output for now
+ errno_t error = ifnet_disable_output(pcb->utun_ifp);
+ if (error != 0) {
+ printf("utun_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
+ } else {
+ pcb->utun_output_disabled = true;
+ }
+ }
+ }
+
+ if (pcb->utun_kpipe_enabled &&
+ (((rc != 0) && (rc != EAGAIN)) || pcb->utun_output_disabled)) {
+ kern_channel_ring_t rx_ring = pcb->utun_kpipe_rxring;
+
+ // Unlock while calling notify
+ lck_rw_unlock_shared(&pcb->utun_pcb_lock);
+ // Signal the kernel pipe ring to read
+ if (rx_ring != NULL) {
+ kern_channel_notify(rx_ring, 0);
+ }
+ } else {
+ lck_rw_unlock_shared(&pcb->utun_pcb_lock);
+ }
+
+ return (0);
+}
+
+static errno_t
+utun_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
+ kern_channel_ring_t rx_ring, uint32_t flags)
+{
+#pragma unused(nxprov)
+#pragma unused(flags)
+ struct utun_pcb *pcb = kern_nexus_get_context(nexus);
+ struct kern_channel_ring_stat_increment rx_ring_stats;
+
+ struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
+
+ lck_rw_lock_shared(&pcb->utun_pcb_lock);
+
+ // Reclaim user-released slots
+ (void) kern_channel_reclaim(rx_ring);
+
+ STATS_INC(nifs, NETIF_STATS_RXSYNC);
+
+ uint32_t avail = kern_channel_available_slot_count(rx_ring);
+ if (avail == 0) {
+ lck_rw_unlock_shared(&pcb->utun_pcb_lock);
+ return 0;
+ }
+
+ struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
+ VERIFY(rx_pp != NULL);
+ bzero(&rx_ring_stats, sizeof(rx_ring_stats));
+ kern_channel_slot_t rx_pslot = NULL;
+ kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
+
+ while (rx_slot != NULL) {
+ // Check for a waiting packet
+ lck_mtx_lock(&pcb->utun_input_chain_lock);
+ mbuf_t data = pcb->utun_input_chain;
+ if (data == NULL) {
+ lck_mtx_unlock(&pcb->utun_input_chain_lock);
+ break;
+ }
+
+ // Allocate rx packet
+ kern_packet_t rx_ph = 0;
+ errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
+ if (unlikely(error != 0)) {
+ STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
+ STATS_INC(nifs, NETIF_STATS_DROPPED);
+ printf("utun_netif_sync_rx %s: failed to allocate packet\n",
+ pcb->utun_ifp->if_xname);
+ lck_mtx_unlock(&pcb->utun_input_chain_lock);
+ break;
+ }
+
+ // Advance waiting packets
+ pcb->utun_input_chain = data->m_nextpkt;
+ data->m_nextpkt = NULL;
+ if (pcb->utun_input_chain == NULL) {
+ pcb->utun_input_chain_last = NULL;
+ }
+ lck_mtx_unlock(&pcb->utun_input_chain_lock);
+
+ size_t header_offset = UTUN_HEADER_SIZE(pcb);
+ size_t length = mbuf_pkthdr_len(data);
+
+ if (length < header_offset) {
+ // mbuf is too small
+ mbuf_freem(data);
+ kern_pbufpool_free(rx_pp, rx_ph);
+ STATS_INC(nifs, NETIF_STATS_BADLEN);
+ STATS_INC(nifs, NETIF_STATS_DROPPED);
+ printf("utun_netif_sync_rx %s: legacy packet length too short for header %zu < %zu\n",
+ pcb->utun_ifp->if_xname, length, header_offset);
+ continue;
+ }
+
+ length -= header_offset;
+ if (length > rx_pp->pp_buflet_size) {
+ // Flush data
+ mbuf_freem(data);
+ kern_pbufpool_free(rx_pp, rx_ph);
+ STATS_INC(nifs, NETIF_STATS_BADLEN);
+ STATS_INC(nifs, NETIF_STATS_DROPPED);
+ printf("utun_netif_sync_rx %s: legacy packet length %zu > %u\n",
+ pcb->utun_ifp->if_xname, length, rx_pp->pp_buflet_size);
+ continue;
+ }
+
+ mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
+
+ // Fillout rx packet
+ kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
+ VERIFY(rx_buf != NULL);
+ void *rx_baddr = kern_buflet_get_object_address(rx_buf);
+ VERIFY(rx_baddr != NULL);
+
+ // Copy-in data from mbuf to buflet
+ mbuf_copydata(data, header_offset, length, (void *)rx_baddr);
+ kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
+
+ // Finalize and attach the packet
+ error = kern_buflet_set_data_offset(rx_buf, 0);
+ VERIFY(error == 0);
+ error = kern_buflet_set_data_length(rx_buf, length);
+ VERIFY(error == 0);
+ error = kern_packet_set_link_header_offset(rx_ph, 0);
+ VERIFY(error == 0);
+ error = kern_packet_set_network_header_offset(rx_ph, 0);
+ VERIFY(error == 0);
+ error = kern_packet_finalize(rx_ph);
+ VERIFY(error == 0);
+ error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
+ VERIFY(error == 0);
+
+ STATS_INC(nifs, NETIF_STATS_RXPKTS);
+ STATS_INC(nifs, NETIF_STATS_RXCOPY_MBUF);
+ bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
+
+ rx_ring_stats.kcrsi_slots_transferred++;
+ rx_ring_stats.kcrsi_bytes_transferred += length;
+
+ mbuf_freem(data);
+
+ // Advance ring
+ rx_pslot = rx_slot;
+ rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
+ }
+
+ struct kern_channel_ring_stat_increment tx_ring_stats;
+ bzero(&tx_ring_stats, sizeof(tx_ring_stats));
+ kern_channel_ring_t tx_ring = pcb->utun_kpipe_txring;
+ kern_channel_slot_t tx_pslot = NULL;
+ kern_channel_slot_t tx_slot = NULL;
+ if (tx_ring == NULL) {
+ // Net-If TX ring not set up yet, nothing to read
+ goto done;
+ }
+
+ // Unlock utun before entering ring
+ lck_rw_unlock_shared(&pcb->utun_pcb_lock);
+
+ (void)kr_enter(tx_ring, TRUE);
+
+ // Lock again after entering and validate
+ lck_rw_lock_shared(&pcb->utun_pcb_lock);
+ if (tx_ring != pcb->utun_kpipe_txring) {
+ goto done;
+ }
+
+ tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
+ if (tx_slot == NULL) {
+ // Nothing to read, don't bother signalling
+ goto done;
+ }
+
+ while (rx_slot != NULL && tx_slot != NULL) {
+ // Allocate rx packet
+ kern_packet_t rx_ph = 0;
+ kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
+
+ // Advance TX ring
+ tx_pslot = tx_slot;
+ tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
+
+ /* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
+ if (tx_ph == 0) {
+ continue;
+ }
+
+ errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
+ if (unlikely(error != 0)) {
+ STATS_INC(nifs, NETIF_STATS_NOMEM_PKT);
+ STATS_INC(nifs, NETIF_STATS_DROPPED);
+ printf("utun_netif_sync_rx %s: failed to allocate packet\n",
+ pcb->utun_ifp->if_xname);
+ break;
+ }
+
+ kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
+ VERIFY(tx_buf != NULL);
+ uint8_t *tx_baddr = kern_buflet_get_object_address(tx_buf);
+ VERIFY(tx_baddr != 0);
+ tx_baddr += kern_buflet_get_data_offset(tx_buf);
+
+ // Check packet length
+ size_t header_offset = UTUN_HEADER_SIZE(pcb);
+ uint32_t tx_length = kern_packet_get_data_length(tx_ph);
+ if (tx_length < header_offset) {
+ // Packet is too small
+ kern_pbufpool_free(rx_pp, rx_ph);
+ STATS_INC(nifs, NETIF_STATS_BADLEN);
+ STATS_INC(nifs, NETIF_STATS_DROPPED);
+ printf("utun_netif_sync_rx %s: packet length too short for header %u < %zu\n",
+ pcb->utun_ifp->if_xname, tx_length, header_offset);
+ continue;
+ }
+
+ size_t length = MIN(tx_length - header_offset,
+ UTUN_IF_DEFAULT_SLOT_SIZE);
+
+ tx_ring_stats.kcrsi_slots_transferred++;
+ tx_ring_stats.kcrsi_bytes_transferred += length;
+
+ // Fillout rx packet
+ kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
+ VERIFY(rx_buf != NULL);
+ void *rx_baddr = kern_buflet_get_object_address(rx_buf);
+ VERIFY(rx_baddr != NULL);
+
+ // Copy-in data from tx to rx
+ memcpy((void *)rx_baddr, (void *)(tx_baddr + header_offset), length);
+ kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
+
+ // Finalize and attach the packet
+ error = kern_buflet_set_data_offset(rx_buf, 0);
+ VERIFY(error == 0);
+ error = kern_buflet_set_data_length(rx_buf, length);
+ VERIFY(error == 0);
+ error = kern_packet_set_link_header_offset(rx_ph, 0);
+ VERIFY(error == 0);
+ error = kern_packet_set_network_header_offset(rx_ph, 0);
+ VERIFY(error == 0);
+ error = kern_packet_finalize(rx_ph);
+ VERIFY(error == 0);
+ error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
+ VERIFY(error == 0);
+
+ STATS_INC(nifs, NETIF_STATS_RXPKTS);
+ STATS_INC(nifs, NETIF_STATS_RXCOPY_DIRECT);
+ bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
+
+ rx_ring_stats.kcrsi_slots_transferred++;
+ rx_ring_stats.kcrsi_bytes_transferred += length;
+
+ rx_pslot = rx_slot;
+ rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
+ }
+
+done:
+ if (rx_pslot) {
+ kern_channel_advance_slot(rx_ring, rx_pslot);
+ kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
+ }
+
+ if (tx_pslot) {
+ kern_channel_advance_slot(tx_ring, tx_pslot);
+ kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
+ (void)kern_channel_reclaim(tx_ring);
+ }
+
+ // Unlock first, then exit ring
+ lck_rw_unlock_shared(&pcb->utun_pcb_lock);
+ if (tx_ring != NULL) {
+ if (tx_pslot != NULL) {
+ kern_channel_notify(tx_ring, 0);
+ }
+ kr_exit(tx_ring);
+ }
+
+ return 0;
+}
+
+static errno_t
+utun_nexus_ifattach(struct utun_pcb *pcb,
+ struct ifnet_init_eparams *init_params,
+ struct ifnet **ifp)
+{
+ errno_t err;
+ nexus_controller_t controller = kern_nexus_shared_controller();
+ struct kern_nexus_net_init net_init;
+
+ nexus_name_t provider_name;
+ snprintf((char *)provider_name, sizeof(provider_name),
+ "com.apple.netif.utun%d", pcb->utun_unit);
+
+ struct kern_nexus_provider_init prov_init = {
+ .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
+ .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
+ .nxpi_pre_connect = utun_nexus_pre_connect,
+ .nxpi_connected = utun_nexus_connected,
+ .nxpi_pre_disconnect = utun_netif_pre_disconnect,
+ .nxpi_disconnected = utun_nexus_disconnected,
+ .nxpi_ring_init = utun_netif_ring_init,
+ .nxpi_ring_fini = utun_netif_ring_fini,
+ .nxpi_slot_init = NULL,
+ .nxpi_slot_fini = NULL,
+ .nxpi_sync_tx = utun_netif_sync_tx,
+ .nxpi_sync_rx = utun_netif_sync_rx,
+ .nxpi_tx_doorbell = utun_netif_tx_doorbell,
+ };
+
+ nexus_attr_t nxa = NULL;
+ err = kern_nexus_attr_create(&nxa);
+ if (err != 0) {
+ printf("%s: kern_nexus_attr_create failed: %d\n",
+ __func__, err);
+ goto failed;
+ }
+
+ uint64_t slot_buffer_size = UTUN_IF_DEFAULT_SLOT_SIZE;
+ err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
+ VERIFY(err == 0);
+
+ // Reset ring size for netif nexus to limit memory usage
+ uint64_t ring_size = if_utun_ring_size;
+ err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
+ VERIFY(err == 0);
+ err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
+ VERIFY(err == 0);
+
+ pcb->utun_netif_txring_size = ring_size;
+
+ err = kern_nexus_controller_register_provider(controller,
+ utun_nx_dom_prov,
+ provider_name,
+ &prov_init,
+ sizeof(prov_init),
+ nxa,
+ &pcb->utun_nx.if_provider);
+ if (err != 0) {
+ printf("%s register provider failed, error %d\n",
+ __func__, err);
+ goto failed;
+ }
+
+ bzero(&net_init, sizeof(net_init));
+ net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
+ net_init.nxneti_flags = 0;
+ net_init.nxneti_eparams = init_params;
+ net_init.nxneti_lladdr = NULL;
+ net_init.nxneti_prepare = utun_netif_prepare;
+ err = kern_nexus_controller_alloc_net_provider_instance(controller,
+ pcb->utun_nx.if_provider,
+ pcb,
+ &pcb->utun_nx.if_instance,
+ &net_init,
+ ifp);
+ if (err != 0) {
+ printf("%s alloc_net_provider_instance failed, %d\n",
+ __func__, err);
+ kern_nexus_controller_deregister_provider(controller,
+ pcb->utun_nx.if_provider);
+ uuid_clear(pcb->utun_nx.if_provider);
+ goto failed;
+ }
+
+failed:
+ if (nxa) {
+ kern_nexus_attr_destroy(nxa);
+ }
+ return (err);
+}
+
+static void
+utun_detach_provider_and_instance(uuid_t provider, uuid_t instance)
+{
+ nexus_controller_t controller = kern_nexus_shared_controller();
+ errno_t err;
+
+ if (!uuid_is_null(instance)) {
+ err = kern_nexus_controller_free_provider_instance(controller,
+ instance);
+ if (err != 0) {
+ printf("%s free_provider_instance failed %d\n",
+ __func__, err);
+ }
+ uuid_clear(instance);
+ }
+ if (!uuid_is_null(provider)) {
+ err = kern_nexus_controller_deregister_provider(controller,
+ provider);
+ if (err != 0) {
+ printf("%s deregister_provider %d\n", __func__, err);
+ }
+ uuid_clear(provider);
+ }
+ return;
+}
+
+static void
+utun_nexus_detach(utun_nx_t nx)
+{
+ nexus_controller_t controller = kern_nexus_shared_controller();
+ errno_t err;
+
+ if (!uuid_is_null(nx->ms_host)) {
+ err = kern_nexus_ifdetach(controller,
+ nx->ms_instance,
+ nx->ms_host);
+ if (err != 0) {
+ printf("%s: kern_nexus_ifdetach ms host failed %d\n",
+ __func__, err);
+ }
+ }
+
+ if (!uuid_is_null(nx->ms_device)) {
+ err = kern_nexus_ifdetach(controller,
+ nx->ms_instance,
+ nx->ms_device);
+ if (err != 0) {
+ printf("%s: kern_nexus_ifdetach ms device failed %d\n",
+ __func__, err);
+ }
+ }
+
+ utun_detach_provider_and_instance(nx->if_provider,
+ nx->if_instance);
+ utun_detach_provider_and_instance(nx->ms_provider,
+ nx->ms_instance);
+
+ memset(nx, 0, sizeof(*nx));
+}
+
+static errno_t
+utun_create_fs_provider_and_instance(uint32_t subtype, const char *type_name,
+ const char *ifname,
+ uuid_t *provider, uuid_t *instance)
+{
+ nexus_attr_t attr = NULL;
+ nexus_controller_t controller = kern_nexus_shared_controller();
+ uuid_t dom_prov;
+ errno_t err;
+ struct kern_nexus_init init;
+ nexus_name_t provider_name;
+
+ err = kern_nexus_get_builtin_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
+ &dom_prov);
+ if (err != 0) {
+ printf("%s can't get %s provider, error %d\n",
+ __func__, type_name, err);
+ goto failed;
+ }
+
+ err = kern_nexus_attr_create(&attr);
+ if (err != 0) {
+ printf("%s: kern_nexus_attr_create failed: %d\n",
+ __func__, err);
+ goto failed;
+ }
+
+ err = kern_nexus_attr_set(attr, NEXUS_ATTR_EXTENSIONS, subtype);
+ VERIFY(err == 0);
+
+ uint64_t slot_buffer_size = UTUN_IF_DEFAULT_SLOT_SIZE;
+ err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
+ VERIFY(err == 0);
+
+ // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
+ uint64_t tx_ring_size = if_utun_tx_fsw_ring_size;
+ err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
+ VERIFY(err == 0);
+ uint64_t rx_ring_size = if_utun_rx_fsw_ring_size;
+ err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
+ VERIFY(err == 0);
+
+ snprintf((char *)provider_name, sizeof(provider_name),
+ "com.apple.%s.%s", type_name, ifname);
+ err = kern_nexus_controller_register_provider(controller,
+ dom_prov,
+ provider_name,
+ NULL,
+ 0,
+ attr,
+ provider);
+ kern_nexus_attr_destroy(attr);
+ attr = NULL;
+ if (err != 0) {
+ printf("%s register %s provider failed, error %d\n",
+ __func__, type_name, err);
+ goto failed;
+ }
+ bzero(&init, sizeof (init));
+ init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
+ err = kern_nexus_controller_alloc_provider_instance(controller,
+ *provider,
+ NULL,
+ instance, &init);
+ if (err != 0) {
+ printf("%s alloc_provider_instance %s failed, %d\n",
+ __func__, type_name, err);
+ kern_nexus_controller_deregister_provider(controller,
+ *provider);
+ uuid_clear(*provider);
+ }
+failed:
+ return (err);
+}
+
+static errno_t
+utun_multistack_attach(struct utun_pcb *pcb)
+{
+ nexus_controller_t controller = kern_nexus_shared_controller();
+ errno_t err = 0;
+ utun_nx_t nx = &pcb->utun_nx;
+
+ // Allocate multistack flowswitch
+ err = utun_create_fs_provider_and_instance(NEXUS_EXTENSION_FSW_TYPE_MULTISTACK,
+ "multistack",
+ pcb->utun_ifp->if_xname,
+ &nx->ms_provider,
+ &nx->ms_instance);
+ if (err != 0) {
+ printf("%s: failed to create bridge provider and instance\n",
+ __func__);
+ goto failed;
+ }
+
+ // Attach multistack to device port
+ err = kern_nexus_ifattach(controller, nx->ms_instance,
+ NULL, nx->if_instance,
+ FALSE, &nx->ms_device);
+ if (err != 0) {
+ printf("%s kern_nexus_ifattach ms device %d\n", __func__, err);
+ goto failed;
+ }
+
+ // Attach multistack to host port
+ err = kern_nexus_ifattach(controller, nx->ms_instance,
+ NULL, nx->if_instance,
+ TRUE, &nx->ms_host);
+ if (err != 0) {
+ printf("%s kern_nexus_ifattach ms host %d\n", __func__, err);
+ goto failed;
+ }
+
+ // Extract the agent UUID and save for later
+ struct kern_nexus *multistack_nx = nx_find(nx->ms_instance, false);
+ if (multistack_nx != NULL) {
+ struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(multistack_nx);
+ if (flowswitch != NULL) {
+ FSW_RLOCK(flowswitch);
+ struct fsw_ms_context *ms_context = (struct fsw_ms_context *)flowswitch->fsw_ops_private;
+ if (ms_context != NULL) {
+ uuid_copy(nx->ms_agent, ms_context->mc_agent_uuid);
+ } else {
+ printf("utun_multistack_attach - fsw_ms_context is NULL\n");
+ }
+ FSW_UNLOCK(flowswitch);
+ } else {
+ printf("utun_multistack_attach - flowswitch is NULL\n");
+ }
+ nx_release(multistack_nx);
+ } else {
+ printf("utun_multistack_attach - unable to find multistack nexus\n");
+ }
+
+ return (0);
+
+failed:
+ utun_nexus_detach(nx);
+
+ errno_t detach_error = 0;
+ if ((detach_error = ifnet_detach(pcb->utun_ifp)) != 0) {
+ panic("utun_multistack_attach - ifnet_detach failed: %d\n", detach_error);
+ /* NOT REACHED */
+ }
+
+ return (err);
+}
+
+static errno_t
+utun_register_kernel_pipe_nexus(void)
+{
+ nexus_attr_t nxa = NULL;
+ errno_t result;
+
+ lck_mtx_lock(&utun_lock);
+ if (utun_ncd_refcount++) {
+ lck_mtx_unlock(&utun_lock);
+ return 0;
+ }
+
+ result = kern_nexus_controller_create(&utun_ncd);
+ if (result) {
+ printf("%s: kern_nexus_controller_create failed: %d\n",
+ __FUNCTION__, result);
+ goto done;
+ }
+
+ uuid_t dom_prov;
+ result = kern_nexus_get_builtin_domain_provider(
+ NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
+ if (result) {
+ printf("%s: kern_nexus_get_builtin_domain_provider failed: %d\n",
+ __FUNCTION__, result);
+ goto done;
+ }
+
+ struct kern_nexus_provider_init prov_init = {
+ .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
+ .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
+ .nxpi_pre_connect = utun_nexus_pre_connect,
+ .nxpi_connected = utun_nexus_connected,
+ .nxpi_pre_disconnect = utun_nexus_pre_disconnect,
+ .nxpi_disconnected = utun_nexus_disconnected,
+ .nxpi_ring_init = utun_kpipe_ring_init,
+ .nxpi_ring_fini = utun_kpipe_ring_fini,
+ .nxpi_slot_init = NULL,
+ .nxpi_slot_fini = NULL,
+ .nxpi_sync_tx = utun_kpipe_sync_tx,
+ .nxpi_sync_rx = utun_kpipe_sync_rx,
+ .nxpi_tx_doorbell = NULL,
+ };
+
+ result = kern_nexus_attr_create(&nxa);
+ if (result) {
+ printf("%s: kern_nexus_attr_create failed: %d\n",
+ __FUNCTION__, result);
+ goto done;
+ }
+
+ uint64_t slot_buffer_size = UTUN_IF_DEFAULT_SLOT_SIZE;
+ result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
+ VERIFY(result == 0);
+
+ // Reset ring size for kernel pipe nexus to limit memory usage
+ uint64_t ring_size = if_utun_ring_size;
+ result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
+ VERIFY(result == 0);
+ result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
+ VERIFY(result == 0);
+
+ result = kern_nexus_controller_register_provider(utun_ncd,
+ dom_prov,
+ (const uint8_t *)"com.apple.nexus.utun.kpipe",
+ &prov_init,
+ sizeof(prov_init),
+ nxa,
+ &utun_kpipe_uuid);
+ if (result) {
+ printf("%s: kern_nexus_controller_register_provider failed: %d\n",
+ __FUNCTION__, result);
+ goto done;
+ }
+
+done:
+ if (nxa) {
+ kern_nexus_attr_destroy(nxa);
+ }
+
+ if (result) {
+ if (utun_ncd) {
+ kern_nexus_controller_destroy(utun_ncd);
+ utun_ncd = NULL;
+ }
+ utun_ncd_refcount = 0;
+ }
+
+ lck_mtx_unlock(&utun_lock);
+
+ return result;
+}
+
+static void
+utun_unregister_kernel_pipe_nexus(void)
+{
+ lck_mtx_lock(&utun_lock);
+
+ VERIFY(utun_ncd_refcount > 0);