+ setup_tunsock(1, NULL);
+}
+
+static const int SOCKET_TRAFFIC_CLASSES[] = {
+ SO_TC_BK_SYS, // BK
+ SO_TC_BK, // BK
+ SO_TC_BE, // BE
+ SO_TC_RD, // BE
+ SO_TC_OAM, // BE
+ SO_TC_AV, // VI
+ SO_TC_RV, // VI
+ SO_TC_VI, // VI
+ SO_TC_VO, // VO
+ SO_TC_CTL, // VO
+};
+
+// this should match ipsec_find_tx_ring_by_svc in ipsec driver
+static const int SOCKET_TC_TO_RING[] = {
+ 3,
+ 3,
+ 2,
+ 2,
+ 2,
+ 1,
+ 1,
+ 1,
+ 0,
+ 0,
+};
+
+/* How many sockets map to this ring */
+static const int RING_TO_TC_COUNT[] = {
+ 2, 3, 3, 2,
+};
+
+static void
+setup_channels_and_rings(int kq, int channel_count, channel_t channels[], channel_ring_t rxrings[], channel_ring_t txrings[], uuid_t uuids[], int cfds[])
+{
+ setup_tunsock(channel_count, uuids);
+
+#if 0
+ // give time to enable a tcpdump if desired
+ T_LOG("Sleeping 10");
+ sleep(10);
+ T_LOG("Done");
+#endif
+
+ for (int ri = 0; ri < channel_count; ri++) {
+ if (rxrings) {
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(channels[ri] = os_channel_create(uuids[ri], 0), NULL);
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(rxrings[ri] = os_channel_rx_ring(channels[ri],
+ os_channel_ring_id(channels[ri], CHANNEL_FIRST_RX_RING)), NULL);
+ }
+ if (txrings) {
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(channels[ri] = os_channel_create(uuids[ri], 0), NULL);
+ T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(rxrings[ri] = os_channel_rx_ring(channels[ri],
+ os_channel_ring_id(channels[ri], CHANNEL_FIRST_TX_RING)), NULL);
+ }
+
+ struct kevent kev;
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(cfds[ri] = os_channel_get_fd(channels[ri]), NULL);
+ EV_SET(&kev, cfds[ri], EVFILT_READ, EV_ADD | EV_ENABLE, 0, 0, (void *)(uintptr_t)ri);
+ T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(kevent(kq, &kev, 1, NULL, 0, NULL), NULL);
+ }
+}
+
+static void
+cleanup_channels_and_rings(int channel_count, channel_t channels[], channel_ring_t rxrings[], channel_ring_t txrings[], uuid_t uuids[])
+{
+ for (int ri = 0; ri < channel_count; ri++) {
+ if (rxrings) {
+ rxrings[ri] = NULL;
+ }
+ if (txrings) {
+ rxrings[ri] = NULL;
+ }
+ os_channel_destroy(channels[ri]);
+ channels[ri] = NULL;
+ uuid_clear(uuids[ri]);
+ }
+}
+
+static void
+setup_sockets(int sockets[SO_TC_MAX], int type)
+{
+ for (int si = 0; si < SO_TC_MAX; si++) {
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(sockets[si] = socket(PF_INET, type, 0), NULL);
+
+ T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(setsockopt(sockets[si], SOL_SOCKET,
+ SO_TRAFFIC_CLASS, &SOCKET_TRAFFIC_CLASSES[si], sizeof(SOCKET_TRAFFIC_CLASSES[si])), NULL);
+
+ // XXX setsockopt(IP_BOUND_IF) here?
+
+ struct sockaddr_in sin;
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr = g_addr1;
+
+ T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(bind(sockets[si], (struct sockaddr *)&sin, sizeof(sin)), NULL);
+
+ char sbuf[INET6_ADDRSTRLEN];
+ inet_ntop(sin.sin_family, &sin.sin_addr.s_addr, sbuf, sizeof(sbuf));
+#if 0
+ T_LOG("%s socket %d bound to %s port %d",
+ type == SOCK_DGRAM ? "udp" : type == SOCK_STREAM ? "tcp" : "???",
+ sockets[si], sbuf, ntohs(sin.sin_port));
+#endif
+ setblocking(sockets[si], false);
+ }
+}
+
+static void
+cleanup_sockets(int sockets[SO_TC_MAX])
+{
+ for (int si = 0; si < SO_TC_MAX; si++) {
+ T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(close(sockets[si]), NULL);
+ sockets[si] = -1;
+ }
+}
+
+static void
+drain_ring(channel_ring_t rxring)
+{
+ uint32_t i, sc = os_channel_available_slot_count(rxring);
+ channel_slot_t rxprev = NULL;
+ for (i = 0; i < sc; i++) {
+ slot_prop_t rxprop;
+ channel_slot_t rxslot;
+
+ memset(&rxprop, 0, sizeof(rxprop));
+ T_QUIET; T_WITH_ERRNO; T_EXPECT_NOTNULL(rxslot = os_channel_get_next_slot(rxring, rxprev, &rxprop), NULL);
+ T_QUIET; T_ASSERT_NE_UINT(0, rxprop.sp_len, NULL);
+ T_QUIET; T_ASSERT_NOTNULL((void *)rxprop.sp_buf_ptr, NULL);
+
+ log_hexdump((void *)rxprop.sp_buf_ptr, rxprop.sp_len);
+
+ rxprev = rxslot;
+ }
+ if (sc) {
+ T_QUIET; T_EXPECT_POSIX_ZERO(os_channel_advance_slot(rxring, rxprev), NULL);
+ }
+}
+
+static void
+send_one_packet(int s, int type)
+{
+ struct sockaddr_in sin;
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr = g_addr2;
+ sin.sin_port = ntohs(12345);
+
+ if (type == SOCK_STREAM) {
+ T_QUIET; T_EXPECT_POSIX_FAILURE(connect(s, (struct sockaddr *)&sin, sizeof(sin)), EINPROGRESS, NULL);
+ }
+ if (type == SOCK_DGRAM) {
+ T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ_LONG((long)sizeof(s), sendto(s, &s, sizeof(s), 0,
+ (struct sockaddr *)&sin, sizeof(sin)), NULL);
+ }
+}
+
+static void
+expect_empty_rings(int channel_count, channel_ring_t rings[])
+{
+ /* Check all the rings and make sure there are no packets */
+ for (int ri = 0; ri < channel_count; ri++) {
+ T_QUIET; T_EXPECT_EQ_UINT(0U, os_channel_available_slot_count(rings[ri]), NULL);
+ }
+}
+
+static void
+xfer_1_packet_singly(int channel_count, int type)
+{
+ uuid_t uuids[channel_count];
+ channel_t channels[channel_count];
+ int sockets[SO_TC_MAX];
+ channel_ring_t rxrings[channel_count];
+ int cfds[channel_count];
+ int kq;
+
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(kq = kqueue(), NULL);
+
+ setup_channels_and_rings(kq, channel_count, channels, rxrings, NULL, uuids, cfds);
+
+ setup_sockets(sockets, type);
+
+ for (int si = 0; si < SO_TC_MAX; si++) {
+ expect_empty_rings(channel_count, rxrings);
+
+ send_one_packet(sockets[si], type);
+
+ int expected_ring = channel_count == 1 ? 0 : SOCKET_TC_TO_RING[si];
+
+ /* Wait for the packet delivery and check that it's only one packet and on the correct ring */
+ struct kevent kev[channel_count + 1];
+ int nev;
+ memset(kev, 0, sizeof(kev));
+ struct timespec to = { 0, 100 * NSEC_PER_MSEC }; // 100 ms
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(nev = kevent(kq, NULL, 0, kev, channel_count + 1, &to), NULL);
+ T_QUIET; T_EXPECT_EQ_INT(nev, 1, NULL);
+ T_QUIET; T_EXPECT_EQ_PTR((void *)kev[0].ident, (void *)(uintptr_t)cfds[expected_ring], NULL);
+ T_QUIET; T_EXPECT_EQ_PTR(kev[0].udata, (void *)(uintptr_t)expected_ring, NULL);
+ T_QUIET; T_EXPECT_EQ_SHORT(kev[0].filter, (short)EVFILT_READ, NULL);
+ T_QUIET; T_EXPECT_FALSE(kev[0].flags & EV_ERROR, NULL);
+
+ /* Make sure it comes out the expected interface */
+ for (int ri = 0; ri < channel_count; ri++) {
+ errno = 0;
+
+ uint32_t sc = os_channel_available_slot_count(rxrings[ri]);
+
+ /* Check that the packet appears only on the expected ring and
+ * is the only packet on the expected ring.
+ */
+ T_QUIET; T_EXPECT_EQ_UINT(ri == expected_ring, sc, NULL);
+
+ if ((ri == expected_ring) == sc) {
+ T_PASS("tc index %d ring %d expected ring %d slot count %u", si, ri, expected_ring, sc);
+ } else {
+ T_FAIL("tc index %d ring %d expected ring %d slot count %u", si, ri, expected_ring, sc);
+ }
+
+ drain_ring(rxrings[ri]);
+ }
+ }
+
+ cleanup_sockets(sockets);
+
+ cleanup_channels_and_rings(channel_count, channels, rxrings, NULL, uuids);
+
+ T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(close(kq), NULL);
+}
+
+T_DECL(ipsec35889979u1s, "transfers 1 packet at a time of each sevice class over udp to a single ring")
+{
+ setup_ipsec_test();
+ xfer_1_packet_singly(1, SOCK_DGRAM);
+}
+
+T_DECL(ipsec35889979u4s, "transfers 1 packet at a time of each sevice class over udp to 4 rings")
+{
+ setup_ipsec_test();
+ xfer_1_packet_singly(4, SOCK_DGRAM);
+}
+
+T_DECL(ipsec35889979t1s, "transfers 1 packet at a time of each sevice class over tcp to a single ring")
+{
+ setup_ipsec_test();
+ xfer_1_packet_singly(1, SOCK_STREAM);
+}
+
+
+T_DECL(ipsec35889979t4s, "transfers 1 packet at a time of each sevice class over tcp to 4 rings",
+ /* This test will fail because tcp syn packets get elevated
+ * due to ack prioritization
+ */
+ T_META_ENABLED(false))
+{
+ setup_ipsec_test();
+ xfer_1_packet_singly(4, SOCK_STREAM);
+}
+
+static void
+xfer_1_packet_together(int channel_count, int type)
+{
+ uuid_t uuids[channel_count];
+ channel_t channels[channel_count];
+ int sockets[SO_TC_MAX];
+ channel_ring_t rxrings[channel_count];
+ int cfds[channel_count];
+ int kq;
+
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(kq = kqueue(), NULL);
+
+ setup_channels_and_rings(kq, channel_count, channels, rxrings, NULL, uuids, cfds);
+
+ setup_sockets(sockets, type);
+
+ for (int si = 0; si < SO_TC_MAX; si++) {
+ expect_empty_rings(channel_count, rxrings);
+
+ send_one_packet(sockets[si], type);
+ }
+
+ /* Sleep to make sure all packets get delivered */
+ struct timespec to = { 0, 100 * NSEC_PER_MSEC }; // 100 ms
+ nanosleep(&to, NULL);
+
+ /* Wait for the packet delivery and check that all rings event */
+ struct kevent kev[channel_count + 1];
+ int nev;
+ memset(kev, 0, sizeof(kev));
+ T_QUIET; T_EXPECT_POSIX_SUCCESS(nev = kevent(kq, NULL, 0, kev, channel_count + 1, &to), NULL);
+ T_QUIET; T_EXPECT_EQ_INT(nev, channel_count, NULL);
+
+ uint32_t found[channel_count];
+ memset(found, 0, sizeof(found));
+ for (int e = 0; e < nev; e++) {
+ T_LOG("kevent %lu filter 0x%4x flags 0x%04x fflags 0x%08x data %"PRIdPTR" udata %p",
+ kev[e].ident, kev[e].filter, kev[e].flags, kev[e].fflags, kev[e].data, kev[e].udata);
+
+ T_QUIET; T_ASSERT_GE_PTR(kev[e].udata, (void *)0, NULL);
+ T_QUIET; T_ASSERT_LT_PTR(kev[e].udata, (void *)(intptr_t)channel_count, NULL);
+ int ri = (int)kev[e].udata;
+ T_QUIET; T_EXPECT_EQ_UINT(found[ri], 0U, NULL);
+
+ T_QUIET; T_EXPECT_EQ_ULONG(kev[e].ident, (uintptr_t)cfds[ri], NULL);
+ T_QUIET; T_EXPECT_EQ_SHORT(kev[e].filter, (short)EVFILT_READ, NULL);
+ T_QUIET; T_EXPECT_FALSE(kev[e].flags & EV_ERROR, NULL);
+
+ if (channel_count == 1) {
+ T_QUIET; T_EXPECT_EQ_LONG(kev[e].data, (long)SO_TC_MAX, NULL);
+ } else {
+ T_QUIET; T_EXPECT_EQ_LONG(kev[e].data, (long)RING_TO_TC_COUNT[ri], NULL);
+ }
+
+ found[ri] += (uint32_t)kev[e].data;
+ }
+ /* Check that something came out of all rings */
+ for (int ri = 0; ri < channel_count; ri++) {
+ T_QUIET; T_EXPECT_NE_UINT(found[ri], 0U, NULL);
+ }
+
+ /* Make sure it comes out the expected interface */
+ for (int ri = 0; ri < channel_count; ri++) {
+ uint32_t sc = os_channel_available_slot_count(rxrings[ri]);
+ if (channel_count == 1) {
+ if (sc == SO_TC_MAX) {
+ T_PASS("ring %d got %"PRIu32" slots expecting %"PRIu32"", ri, sc, SO_TC_MAX);
+ } else {
+ T_FAIL("ring %d got %"PRIu32" slots expecting %"PRIu32"", ri, sc, SO_TC_MAX);
+ }
+ } else {
+ if (sc == (uint32_t)RING_TO_TC_COUNT[ri]) {
+ T_PASS("ring %d got %"PRIu32" slots expecting %"PRIu32"", ri, sc, (uint32_t)RING_TO_TC_COUNT[ri]);
+ } else {
+ T_FAIL("ring %d got %"PRIu32" slots expecting %"PRIu32"", ri, sc, (uint32_t)RING_TO_TC_COUNT[ri]);
+ }
+ }
+
+ drain_ring(rxrings[ri]);
+ }
+
+ cleanup_sockets(sockets);
+
+ cleanup_channels_and_rings(channel_count, channels, rxrings, NULL, uuids);
+
+ T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(close(kq), NULL);
+}
+
+T_DECL(ipsec35889979u1m, "transfers 1 packet together of each sevice class over udp to a single ring")
+{
+ setup_ipsec_test();
+ xfer_1_packet_together(1, SOCK_DGRAM);
+}
+
+T_DECL(ipsec35889979u4m, "transfers 1 packet together of each sevice class over udp to 4 rings")
+{
+ setup_ipsec_test();
+ xfer_1_packet_together(4, SOCK_DGRAM);
+}
+
+T_DECL(ipsec35889979t1m, "transfers 1 packet together of each sevice class over tcp to a single ring")
+{
+ setup_ipsec_test();
+ xfer_1_packet_together(1, SOCK_STREAM);
+}
+
+T_DECL(ipsec35889979t4m, "transfers 1 packet together of each sevice class over tcp to 4 rings",
+ /* This test will fail because tcp syn packets get elevated
+ * due to ack prioritization
+ */
+ T_META_ENABLED(false))
+{
+ setup_ipsec_test();
+ xfer_1_packet_together(4, SOCK_STREAM);