]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/pktsched/pktsched_tcq.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_tcq.c
CommitLineData
316670eb 1/*
a39ff7e2 2 * Copyright (c) 2011-2018 Apple Inc. All rights reserved.
316670eb
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * traffic class queue
31 */
32
33#include <sys/cdefs.h>
34#include <sys/param.h>
35#include <sys/malloc.h>
36#include <sys/mbuf.h>
37#include <sys/systm.h>
38#include <sys/errno.h>
39#include <sys/kernel.h>
40#include <sys/syslog.h>
41
42#include <kern/zalloc.h>
43
44#include <net/if.h>
45#include <net/net_osdep.h>
46
47#include <net/pktsched/pktsched_tcq.h>
48#include <netinet/in.h>
49
5ba3f43e 50
316670eb
A
51/*
52 * function prototypes
53 */
cb323159
A
54static int tcq_enqueue_ifclassq(struct ifclassq *, classq_pkt_t *, boolean_t *);
55static void tcq_dequeue_tc_ifclassq(struct ifclassq *, mbuf_svc_class_t,
56 classq_pkt_t *);
316670eb
A
57static int tcq_request_ifclassq(struct ifclassq *, cqrq_t, void *);
58static int tcq_clear_interface(struct tcq_if *);
59static struct tcq_class *tcq_class_create(struct tcq_if *, int, u_int32_t,
5ba3f43e 60 int, u_int32_t, classq_pkt_type_t);
316670eb
A
61static int tcq_class_destroy(struct tcq_if *, struct tcq_class *);
62static int tcq_destroy_locked(struct tcq_if *);
5ba3f43e 63static inline int tcq_addq(struct tcq_class *, pktsched_pkt_t *,
316670eb 64 struct pf_mtag *);
5ba3f43e 65static inline void tcq_getq(struct tcq_class *, pktsched_pkt_t *);
316670eb
A
66static void tcq_purgeq(struct tcq_if *, struct tcq_class *, u_int32_t,
67 u_int32_t *, u_int32_t *);
68static void tcq_purge_sc(struct tcq_if *, cqrq_purge_sc_t *);
69static void tcq_updateq(struct tcq_if *, struct tcq_class *, cqev_t);
70static int tcq_throttle(struct tcq_if *, cqrq_throttle_t *);
71static int tcq_resumeq(struct tcq_if *, struct tcq_class *);
72static int tcq_suspendq(struct tcq_if *, struct tcq_class *);
39236c6e 73static int tcq_stat_sc(struct tcq_if *, cqrq_stat_sc_t *);
5ba3f43e
A
74static void tcq_dequeue_cl(struct tcq_if *, struct tcq_class *,
75 mbuf_svc_class_t, pktsched_pkt_t *);
316670eb
A
76static inline struct tcq_class *tcq_clh_to_clp(struct tcq_if *, u_int32_t);
77static const char *tcq_style(struct tcq_if *);
78
0a7de745
A
79#define TCQ_ZONE_MAX 32 /* maximum elements in zone */
80#define TCQ_ZONE_NAME "pktsched_tcq" /* zone name */
316670eb 81
0a7de745
A
82static unsigned int tcq_size; /* size of zone element */
83static struct zone *tcq_zone; /* zone for tcq */
316670eb 84
0a7de745
A
85#define TCQ_CL_ZONE_MAX 32 /* maximum elements in zone */
86#define TCQ_CL_ZONE_NAME "pktsched_tcq_cl" /* zone name */
316670eb 87
0a7de745
A
88static unsigned int tcq_cl_size; /* size of zone element */
89static struct zone *tcq_cl_zone; /* zone for tcq_class */
316670eb
A
90
91void
92tcq_init(void)
93{
0a7de745 94 tcq_size = sizeof(struct tcq_if);
316670eb
A
95 tcq_zone = zinit(tcq_size, TCQ_ZONE_MAX * tcq_size,
96 0, TCQ_ZONE_NAME);
97 if (tcq_zone == NULL) {
98 panic("%s: failed allocating %s", __func__, TCQ_ZONE_NAME);
99 /* NOTREACHED */
100 }
101 zone_change(tcq_zone, Z_EXPAND, TRUE);
102 zone_change(tcq_zone, Z_CALLERACCT, TRUE);
103
0a7de745 104 tcq_cl_size = sizeof(struct tcq_class);
316670eb
A
105 tcq_cl_zone = zinit(tcq_cl_size, TCQ_CL_ZONE_MAX * tcq_cl_size,
106 0, TCQ_CL_ZONE_NAME);
107 if (tcq_cl_zone == NULL) {
108 panic("%s: failed allocating %s", __func__, TCQ_CL_ZONE_NAME);
109 /* NOTREACHED */
110 }
111 zone_change(tcq_cl_zone, Z_EXPAND, TRUE);
112 zone_change(tcq_cl_zone, Z_CALLERACCT, TRUE);
113}
114
115struct tcq_if *
5ba3f43e 116tcq_alloc(struct ifnet *ifp, int how)
316670eb 117{
0a7de745 118 struct tcq_if *tif;
316670eb
A
119
120 tif = (how == M_WAITOK) ? zalloc(tcq_zone) : zalloc_noblock(tcq_zone);
0a7de745
A
121 if (tif == NULL) {
122 return NULL;
123 }
316670eb
A
124
125 bzero(tif, tcq_size);
126 tif->tif_maxpri = -1;
127 tif->tif_ifq = &ifp->if_snd;
316670eb
A
128
129 if (pktsched_verbose) {
130 log(LOG_DEBUG, "%s: %s scheduler allocated\n",
131 if_name(ifp), tcq_style(tif));
132 }
133
0a7de745 134 return tif;
316670eb
A
135}
136
137int
138tcq_destroy(struct tcq_if *tif)
139{
140 struct ifclassq *ifq = tif->tif_ifq;
141 int err;
142
143 IFCQ_LOCK(ifq);
144 err = tcq_destroy_locked(tif);
145 IFCQ_UNLOCK(ifq);
146
0a7de745 147 return err;
316670eb
A
148}
149
150static int
151tcq_destroy_locked(struct tcq_if *tif)
152{
153 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
154
155 (void) tcq_clear_interface(tif);
156
157 if (pktsched_verbose) {
158 log(LOG_DEBUG, "%s: %s scheduler destroyed\n",
159 if_name(TCQIF_IFP(tif)), tcq_style(tif));
160 }
161
162 zfree(tcq_zone, tif);
163
0a7de745 164 return 0;
316670eb
A
165}
166
167/*
168 * bring the interface back to the initial state by discarding
169 * all the filters and classes.
170 */
171static int
172tcq_clear_interface(struct tcq_if *tif)
173{
0a7de745 174 struct tcq_class *cl;
316670eb
A
175 int pri;
176
177 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
178
179 /* clear out the classes */
0a7de745
A
180 for (pri = 0; pri <= tif->tif_maxpri; pri++) {
181 if ((cl = tif->tif_classes[pri]) != NULL) {
316670eb 182 tcq_class_destroy(tif, cl);
0a7de745
A
183 }
184 }
316670eb 185
0a7de745 186 return 0;
316670eb
A
187}
188
189/* discard all the queued packets on the interface */
190void
191tcq_purge(struct tcq_if *tif)
192{
193 struct tcq_class *cl;
194 int pri;
195
196 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
197
198 for (pri = 0; pri <= tif->tif_maxpri; pri++) {
0a7de745 199 if ((cl = tif->tif_classes[pri]) != NULL && !qempty(&cl->cl_q)) {
316670eb 200 tcq_purgeq(tif, cl, 0, NULL, NULL);
0a7de745 201 }
316670eb 202 }
316670eb 203 VERIFY(IFCQ_LEN(tif->tif_ifq) == 0);
316670eb
A
204}
205
206static void
207tcq_purge_sc(struct tcq_if *tif, cqrq_purge_sc_t *pr)
208{
209 struct ifclassq *ifq = tif->tif_ifq;
210 u_int32_t i;
211
212 IFCQ_LOCK_ASSERT_HELD(ifq);
213
214 VERIFY(pr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(pr->sc));
215 VERIFY(pr->flow != 0);
216
217 if (pr->sc != MBUF_SC_UNSPEC) {
218 i = MBUF_SCIDX(pr->sc);
219 VERIFY(i < IFCQ_SC_MAX);
220
221 tcq_purgeq(tif, ifq->ifcq_disc_slots[i].cl,
222 pr->flow, &pr->packets, &pr->bytes);
223 } else {
224 u_int32_t cnt, len;
225
226 pr->packets = 0;
227 pr->bytes = 0;
228
229 for (i = 0; i < IFCQ_SC_MAX; i++) {
230 tcq_purgeq(tif, ifq->ifcq_disc_slots[i].cl,
231 pr->flow, &cnt, &len);
232 pr->packets += cnt;
233 pr->bytes += len;
234 }
235 }
236}
237
238void
239tcq_event(struct tcq_if *tif, cqev_t ev)
240{
241 struct tcq_class *cl;
242 int pri;
243
244 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
245
0a7de745
A
246 for (pri = 0; pri <= tif->tif_maxpri; pri++) {
247 if ((cl = tif->tif_classes[pri]) != NULL) {
316670eb 248 tcq_updateq(tif, cl, ev);
0a7de745
A
249 }
250 }
316670eb
A
251}
252
253int
254tcq_add_queue(struct tcq_if *tif, int priority, u_int32_t qlimit,
5ba3f43e 255 int flags, u_int32_t qid, struct tcq_class **clp, classq_pkt_type_t ptype)
316670eb
A
256{
257 struct tcq_class *cl;
258
259 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
260
261 /* check parameters */
0a7de745
A
262 if (priority >= TCQ_MAXPRI) {
263 return EINVAL;
264 }
265 if (tif->tif_classes[priority] != NULL) {
266 return EBUSY;
267 }
268 if (tcq_clh_to_clp(tif, qid) != NULL) {
269 return EBUSY;
270 }
316670eb 271
5ba3f43e 272 cl = tcq_class_create(tif, priority, qlimit, flags, qid, ptype);
0a7de745
A
273 if (cl == NULL) {
274 return ENOMEM;
275 }
316670eb 276
0a7de745 277 if (clp != NULL) {
316670eb 278 *clp = cl;
0a7de745 279 }
316670eb 280
0a7de745 281 return 0;
316670eb
A
282}
283
284static struct tcq_class *
285tcq_class_create(struct tcq_if *tif, int pri, u_int32_t qlimit,
5ba3f43e 286 int flags, u_int32_t qid, classq_pkt_type_t ptype)
316670eb
A
287{
288 struct ifnet *ifp;
289 struct ifclassq *ifq;
290 struct tcq_class *cl;
291
292 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
293
316670eb
A
294 ifq = tif->tif_ifq;
295 ifp = TCQIF_IFP(tif);
296
297 if ((cl = tif->tif_classes[pri]) != NULL) {
298 /* modify the class instead of creating a new one */
0a7de745 299 if (!qempty(&cl->cl_q)) {
316670eb 300 tcq_purgeq(tif, cl, 0, NULL, NULL);
0a7de745 301 }
5ba3f43e 302
0a7de745 303 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
316670eb 304 sfb_destroy(cl->cl_sfb);
0a7de745 305 }
316670eb
A
306 cl->cl_qalg.ptr = NULL;
307 qtype(&cl->cl_q) = Q_DROPTAIL;
308 qstate(&cl->cl_q) = QS_RUNNING;
5ba3f43e 309 VERIFY(qptype(&cl->cl_q) == ptype);
316670eb
A
310 } else {
311 cl = zalloc(tcq_cl_zone);
0a7de745
A
312 if (cl == NULL) {
313 return NULL;
314 }
316670eb
A
315
316 bzero(cl, tcq_cl_size);
317 }
318
319 tif->tif_classes[pri] = cl;
0a7de745 320 if (flags & TQCF_DEFAULTCLASS) {
316670eb 321 tif->tif_default = cl;
0a7de745 322 }
316670eb
A
323 if (qlimit == 0 || qlimit > IFCQ_MAXLEN(ifq)) {
324 qlimit = IFCQ_MAXLEN(ifq);
0a7de745 325 if (qlimit == 0) {
316670eb 326 qlimit = DEFAULT_QLIMIT; /* use default */
0a7de745 327 }
316670eb 328 }
5ba3f43e 329 _qinit(&cl->cl_q, Q_DROPTAIL, qlimit, ptype);
316670eb
A
330 cl->cl_flags = flags;
331 cl->cl_pri = pri;
0a7de745 332 if (pri > tif->tif_maxpri) {
316670eb 333 tif->tif_maxpri = pri;
0a7de745 334 }
316670eb
A
335 cl->cl_tif = tif;
336 cl->cl_handle = qid;
337
5ba3f43e 338 if (flags & TQCF_SFB) {
316670eb
A
339 cl->cl_qflags = 0;
340 if (flags & TQCF_ECN) {
5ba3f43e 341 cl->cl_qflags |= SFBF_ECN;
316670eb
A
342 }
343 if (flags & TQCF_FLOWCTL) {
5ba3f43e 344 cl->cl_qflags |= SFBF_FLOWCTL;
316670eb 345 }
fe8ab488 346 if (flags & TQCF_DELAYBASED) {
5ba3f43e 347 cl->cl_qflags |= SFBF_DELAYBASED;
316670eb 348 }
0a7de745 349 if (!(cl->cl_flags & TQCF_LAZY)) {
5ba3f43e
A
350 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
351 qlimit(&cl->cl_q), cl->cl_qflags);
0a7de745
A
352 }
353 if (cl->cl_sfb != NULL || (cl->cl_flags & TQCF_LAZY)) {
5ba3f43e 354 qtype(&cl->cl_q) = Q_SFB;
0a7de745 355 }
316670eb
A
356 }
357
358 if (pktsched_verbose) {
359 log(LOG_DEBUG, "%s: %s created qid=%d pri=%d qlimit=%d "
360 "flags=%b\n", if_name(ifp), tcq_style(tif),
361 cl->cl_handle, cl->cl_pri, qlimit, flags, TQCF_BITS);
362 }
363
0a7de745 364 return cl;
316670eb
A
365}
366
367int
368tcq_remove_queue(struct tcq_if *tif, u_int32_t qid)
369{
370 struct tcq_class *cl;
371
372 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
373
0a7de745
A
374 if ((cl = tcq_clh_to_clp(tif, qid)) == NULL) {
375 return EINVAL;
376 }
316670eb 377
0a7de745 378 return tcq_class_destroy(tif, cl);
316670eb
A
379}
380
381static int
382tcq_class_destroy(struct tcq_if *tif, struct tcq_class *cl)
383{
384 struct ifclassq *ifq = tif->tif_ifq;
385 int pri;
5ba3f43e
A
386#if !MACH_ASSERT
387#pragma unused(ifq)
388#endif
316670eb
A
389 IFCQ_LOCK_ASSERT_HELD(ifq);
390
0a7de745 391 if (!qempty(&cl->cl_q)) {
316670eb 392 tcq_purgeq(tif, cl, 0, NULL, NULL);
0a7de745 393 }
316670eb
A
394
395 tif->tif_classes[cl->cl_pri] = NULL;
396 if (tif->tif_maxpri == cl->cl_pri) {
0a7de745 397 for (pri = cl->cl_pri; pri >= 0; pri--) {
316670eb
A
398 if (tif->tif_classes[pri] != NULL) {
399 tif->tif_maxpri = pri;
400 break;
401 }
0a7de745
A
402 }
403 if (pri < 0) {
316670eb 404 tif->tif_maxpri = -1;
0a7de745 405 }
316670eb
A
406 }
407
0a7de745 408 if (tif->tif_default == cl) {
316670eb 409 tif->tif_default = NULL;
0a7de745 410 }
316670eb
A
411
412 if (cl->cl_qalg.ptr != NULL) {
0a7de745 413 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
316670eb 414 sfb_destroy(cl->cl_sfb);
0a7de745 415 }
316670eb
A
416 cl->cl_qalg.ptr = NULL;
417 qtype(&cl->cl_q) = Q_DROPTAIL;
418 qstate(&cl->cl_q) = QS_RUNNING;
419 }
420
421 if (pktsched_verbose) {
422 log(LOG_DEBUG, "%s: %s destroyed qid=%d pri=%d\n",
423 if_name(TCQIF_IFP(tif)), tcq_style(tif),
424 cl->cl_handle, cl->cl_pri);
425 }
426
427 zfree(tcq_cl_zone, cl);
0a7de745 428 return 0;
316670eb
A
429}
430
431int
5ba3f43e 432tcq_enqueue(struct tcq_if *tif, struct tcq_class *cl, pktsched_pkt_t *pkt,
316670eb
A
433 struct pf_mtag *t)
434{
435 struct ifclassq *ifq = tif->tif_ifq;
436 int len, ret;
437
438 IFCQ_LOCK_ASSERT_HELD(ifq);
439 VERIFY(cl == NULL || cl->cl_tif == tif);
440
441 if (cl == NULL) {
39236c6e 442 cl = tcq_clh_to_clp(tif, 0);
316670eb
A
443 if (cl == NULL) {
444 cl = tif->tif_default;
445 if (cl == NULL) {
446 IFCQ_CONVERT_LOCK(ifq);
0a7de745 447 return CLASSQEQ_DROP;
316670eb
A
448 }
449 }
450 }
451
5ba3f43e
A
452 VERIFY(pkt->pktsched_ptype == qptype(&cl->cl_q));
453 len = pktsched_get_pkt_len(pkt);
454
455 ret = tcq_addq(cl, pkt, t);
456 if ((ret != 0) && (ret != CLASSQEQ_SUCCESS_FC)) {
457 VERIFY(ret == CLASSQEQ_DROP ||
458 ret == CLASSQEQ_DROP_FC ||
459 ret == CLASSQEQ_DROP_SP);
460 PKTCNTR_ADD(&cl->cl_dropcnt, 1, len);
461 IFCQ_DROP_ADD(ifq, 1, len);
0a7de745 462 return ret;
316670eb
A
463 }
464 IFCQ_INC_LEN(ifq);
3e170ce0 465 IFCQ_INC_BYTES(ifq, len);
316670eb
A
466
467 /* successfully queued. */
0a7de745 468 return ret;
316670eb
A
469}
470
471/*
472 * note: CLASSQDQ_POLL returns the next packet without removing the packet
473 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
474 * CLASSQDQ_REMOVE must return the same packet if called immediately
475 * after CLASSQDQ_POLL.
476 */
5ba3f43e
A
477void
478tcq_dequeue_tc(struct tcq_if *tif, mbuf_svc_class_t sc, pktsched_pkt_t *pkt)
316670eb 479{
5ba3f43e 480 tcq_dequeue_cl(tif, NULL, sc, pkt);
316670eb
A
481}
482
5ba3f43e
A
483static void
484tcq_dequeue_cl(struct tcq_if *tif, struct tcq_class *cl, mbuf_svc_class_t sc,
485 pktsched_pkt_t *pkt)
316670eb
A
486{
487 struct ifclassq *ifq = tif->tif_ifq;
5ba3f43e 488 uint32_t len;
316670eb
A
489
490 IFCQ_LOCK_ASSERT_HELD(ifq);
cb323159 491 pkt->pktsched_pkt_mbuf = NULL;
316670eb
A
492
493 if (cl == NULL) {
494 cl = tcq_clh_to_clp(tif, MBUF_SCIDX(sc));
5ba3f43e 495 if (cl == NULL) {
5ba3f43e
A
496 return;
497 }
316670eb
A
498 }
499
5ba3f43e 500 if (qempty(&cl->cl_q)) {
5ba3f43e
A
501 return;
502 }
316670eb
A
503
504 VERIFY(!IFCQ_IS_EMPTY(ifq));
505
5ba3f43e 506 tcq_getq(cl, pkt);
cb323159 507 if (pkt->pktsched_pkt_mbuf != NULL) {
5ba3f43e 508 len = pktsched_get_pkt_len(pkt);
316670eb 509 IFCQ_DEC_LEN(ifq);
5ba3f43e 510 IFCQ_DEC_BYTES(ifq, len);
0a7de745 511 if (qempty(&cl->cl_q)) {
316670eb 512 cl->cl_period++;
0a7de745 513 }
5ba3f43e
A
514 PKTCNTR_ADD(&cl->cl_xmitcnt, 1, len);
515 IFCQ_XMIT_ADD(ifq, 1, len);
316670eb 516 }
316670eb
A
517}
518
519static inline int
5ba3f43e 520tcq_addq(struct tcq_class *cl, pktsched_pkt_t *pkt, struct pf_mtag *t)
316670eb
A
521{
522 struct tcq_if *tif = cl->cl_tif;
523 struct ifclassq *ifq = tif->tif_ifq;
524
525 IFCQ_LOCK_ASSERT_HELD(ifq);
526
316670eb
A
527 if (q_is_sfb(&cl->cl_q)) {
528 if (cl->cl_sfb == NULL) {
529 struct ifnet *ifp = TCQIF_IFP(tif);
530
531 VERIFY(cl->cl_flags & TQCF_LAZY);
532 cl->cl_flags &= ~TQCF_LAZY;
533 IFCQ_CONVERT_LOCK(ifq);
534
535 cl->cl_sfb = sfb_alloc(ifp, cl->cl_handle,
536 qlimit(&cl->cl_q), cl->cl_qflags);
537 if (cl->cl_sfb == NULL) {
538 /* fall back to droptail */
539 qtype(&cl->cl_q) = Q_DROPTAIL;
540 cl->cl_flags &= ~TQCF_SFB;
541 cl->cl_qflags &= ~(SFBF_ECN | SFBF_FLOWCTL);
542
543 log(LOG_ERR, "%s: %s SFB lazy allocation "
544 "failed for qid=%d pri=%d, falling back "
545 "to DROPTAIL\n", if_name(ifp),
546 tcq_style(tif), cl->cl_handle,
547 cl->cl_pri);
548 } else if (tif->tif_throttle != IFNET_THROTTLE_OFF) {
549 /* if there's pending throttling, set it */
550 cqrq_throttle_t tr = { 1, tif->tif_throttle };
551 int err = tcq_throttle(tif, &tr);
552
0a7de745 553 if (err == EALREADY) {
316670eb 554 err = 0;
0a7de745 555 }
316670eb
A
556 if (err != 0) {
557 tr.level = IFNET_THROTTLE_OFF;
558 (void) tcq_throttle(tif, &tr);
559 }
560 }
561 }
0a7de745
A
562 if (cl->cl_sfb != NULL) {
563 return sfb_addq(cl->cl_sfb, &cl->cl_q, pkt, t);
564 }
316670eb
A
565 } else if (qlen(&cl->cl_q) >= qlimit(&cl->cl_q)) {
566 IFCQ_CONVERT_LOCK(ifq);
0a7de745 567 return CLASSQEQ_DROP;
316670eb
A
568 }
569
39236c6e 570#if PF_ECN
0a7de745 571 if (cl->cl_flags & TQCF_CLEARDSCP) {
a39ff7e2 572 /* not supported for non-BSD stack packets */
5ba3f43e 573 VERIFY(pkt->pktsched_ptype == QP_MBUF);
0a7de745
A
574 }
575 write_dsfield(m, t, 0);
39236c6e 576#endif /* PF_ECN */
316670eb 577
5ba3f43e 578 VERIFY(pkt->pktsched_ptype == qptype(&cl->cl_q));
cb323159 579 _addq(&cl->cl_q, &pkt->pktsched_pkt);
316670eb 580
0a7de745 581 return 0;
316670eb
A
582}
583
5ba3f43e
A
584static inline void
585tcq_getq(struct tcq_class *cl, pktsched_pkt_t *pkt)
316670eb 586{
cb323159
A
587 classq_pkt_t p = CLASSQ_PKT_INITIALIZER(p);
588
316670eb
A
589 IFCQ_LOCK_ASSERT_HELD(cl->cl_tif->tif_ifq);
590
5ba3f43e 591 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
0a7de745 592 return sfb_getq(cl->cl_sfb, &cl->cl_q, pkt);
5ba3f43e 593 }
316670eb 594
cb323159
A
595 _getq(&cl->cl_q, &p);
596 return pktsched_pkt_encap(pkt, &p);
316670eb
A
597}
598
599static void
600tcq_purgeq(struct tcq_if *tif, struct tcq_class *cl, u_int32_t flow,
601 u_int32_t *packets, u_int32_t *bytes)
602{
603 struct ifclassq *ifq = tif->tif_ifq;
604 u_int32_t cnt = 0, len = 0, qlen;
605
606 IFCQ_LOCK_ASSERT_HELD(ifq);
607
0a7de745 608 if ((qlen = qlen(&cl->cl_q)) == 0) {
316670eb 609 goto done;
0a7de745 610 }
316670eb 611
316670eb 612 IFCQ_CONVERT_LOCK(ifq);
0a7de745 613 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
316670eb 614 sfb_purgeq(cl->cl_sfb, &cl->cl_q, flow, &cnt, &len);
0a7de745 615 } else {
316670eb 616 _flushq_flow(&cl->cl_q, flow, &cnt, &len);
0a7de745 617 }
316670eb
A
618
619 if (cnt > 0) {
620 VERIFY(qlen(&cl->cl_q) == (qlen - cnt));
621
622 PKTCNTR_ADD(&cl->cl_dropcnt, cnt, len);
623 IFCQ_DROP_ADD(ifq, cnt, len);
624
625 VERIFY(((signed)IFCQ_LEN(ifq) - cnt) >= 0);
626 IFCQ_LEN(ifq) -= cnt;
627
628 if (pktsched_verbose) {
629 log(LOG_DEBUG, "%s: %s purge qid=%d pri=%d "
630 "qlen=[%d,%d] cnt=%d len=%d flow=0x%x\n",
631 if_name(TCQIF_IFP(tif)), tcq_style(tif),
632 cl->cl_handle, cl->cl_pri, qlen, qlen(&cl->cl_q),
633 cnt, len, flow);
634 }
635 }
636done:
0a7de745 637 if (packets != NULL) {
316670eb 638 *packets = cnt;
0a7de745
A
639 }
640 if (bytes != NULL) {
316670eb 641 *bytes = len;
0a7de745 642 }
316670eb
A
643}
644
645static void
646tcq_updateq(struct tcq_if *tif, struct tcq_class *cl, cqev_t ev)
647{
648 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
649
650 if (pktsched_verbose) {
651 log(LOG_DEBUG, "%s: %s update qid=%d pri=%d event=%s\n",
652 if_name(TCQIF_IFP(tif)), tcq_style(tif),
653 cl->cl_handle, cl->cl_pri, ifclassq_ev2str(ev));
654 }
655
0a7de745
A
656 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
657 return sfb_updateq(cl->cl_sfb, ev);
658 }
316670eb
A
659}
660
661int
662tcq_get_class_stats(struct tcq_if *tif, u_int32_t qid,
663 struct tcq_classstats *sp)
664{
665 struct tcq_class *cl;
666
667 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
668
0a7de745
A
669 if ((cl = tcq_clh_to_clp(tif, qid)) == NULL) {
670 return EINVAL;
671 }
316670eb
A
672
673 sp->class_handle = cl->cl_handle;
674 sp->priority = cl->cl_pri;
675 sp->qlength = qlen(&cl->cl_q);
676 sp->qlimit = qlimit(&cl->cl_q);
677 sp->period = cl->cl_period;
678 sp->xmitcnt = cl->cl_xmitcnt;
679 sp->dropcnt = cl->cl_dropcnt;
680
681 sp->qtype = qtype(&cl->cl_q);
682 sp->qstate = qstate(&cl->cl_q);
5ba3f43e 683
0a7de745 684 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
316670eb 685 sfb_getstats(cl->cl_sfb, &sp->sfb);
0a7de745 686 }
316670eb 687
0a7de745 688 return 0;
316670eb
A
689}
690
39236c6e
A
691static int
692tcq_stat_sc(struct tcq_if *tif, cqrq_stat_sc_t *sr)
693{
694 struct ifclassq *ifq = tif->tif_ifq;
695 struct tcq_class *cl;
696 u_int32_t i;
697
698 IFCQ_LOCK_ASSERT_HELD(ifq);
699
700 VERIFY(sr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sr->sc));
701
702 i = MBUF_SCIDX(sr->sc);
703 VERIFY(i < IFCQ_SC_MAX);
704
705 cl = ifq->ifcq_disc_slots[i].cl;
706 sr->packets = qlen(&cl->cl_q);
707 sr->bytes = qsize(&cl->cl_q);
708
0a7de745 709 return 0;
39236c6e
A
710}
711
316670eb
A
712/* convert a class handle to the corresponding class pointer */
713static inline struct tcq_class *
714tcq_clh_to_clp(struct tcq_if *tif, u_int32_t chandle)
715{
716 struct tcq_class *cl;
717 int idx;
718
719 IFCQ_LOCK_ASSERT_HELD(tif->tif_ifq);
720
0a7de745 721 for (idx = tif->tif_maxpri; idx >= 0; idx--) {
316670eb 722 if ((cl = tif->tif_classes[idx]) != NULL &&
0a7de745
A
723 cl->cl_handle == chandle) {
724 return cl;
725 }
726 }
316670eb 727
0a7de745 728 return NULL;
316670eb
A
729}
730
731static const char *
732tcq_style(struct tcq_if *tif)
733{
5ba3f43e 734#pragma unused(tif)
0a7de745 735 return "TCQ";
316670eb
A
736}
737
738/*
739 * tcq_enqueue_ifclassq is an enqueue function to be registered to
740 * (*ifcq_enqueue) in struct ifclassq.
741 */
742static int
cb323159 743tcq_enqueue_ifclassq(struct ifclassq *ifq, classq_pkt_t *p, boolean_t *pdrop)
316670eb 744{
5ba3f43e
A
745 u_int32_t i = 0;
746 int ret;
747 pktsched_pkt_t pkt;
748 struct pf_mtag *t = NULL;
316670eb
A
749
750 IFCQ_LOCK_ASSERT_HELD(ifq);
751
cb323159
A
752 if (p->cp_ptype == QP_MBUF) {
753 struct mbuf *m = p->cp_mbuf;
5ba3f43e
A
754 if (!(m->m_flags & M_PKTHDR)) {
755 /* should not happen */
756 log(LOG_ERR, "%s: packet does not have pkthdr\n",
757 if_name(ifq->ifcq_ifp));
758 IFCQ_CONVERT_LOCK(ifq);
759 m_freem(m);
cb323159 760 *p = CLASSQ_PKT_INITIALIZER(*p);
5ba3f43e 761 *pdrop = TRUE;
0a7de745 762 return ENOBUFS;
5ba3f43e
A
763 }
764 t = m_pftag(m);
765 i = MBUF_SCIDX(mbuf_get_service_class(m));
316670eb 766 }
316670eb
A
767 VERIFY((u_int32_t)i < IFCQ_SC_MAX);
768
cb323159 769 pktsched_pkt_encap(&pkt, p);
5ba3f43e
A
770
771 ret = tcq_enqueue(ifq->ifcq_disc,
772 ifq->ifcq_disc_slots[i].cl, &pkt, t);
773
774 if ((ret != 0) && (ret != CLASSQEQ_SUCCESS_FC)) {
775 pktsched_free_pkt(&pkt);
776 *pdrop = TRUE;
777 } else {
778 *pdrop = FALSE;
779 }
780
781 switch (ret) {
782 case CLASSQEQ_DROP:
783 ret = ENOBUFS;
784 break;
785 case CLASSQEQ_DROP_FC:
786 ret = EQFULL;
787 break;
788 case CLASSQEQ_DROP_SP:
789 ret = EQSUSPENDED;
790 break;
791 case CLASSQEQ_SUCCESS_FC:
792 ret = EQFULL;
793 break;
794 case CLASSQEQ_SUCCESS:
795 ret = 0;
796 break;
797 default:
798 VERIFY(0);
cb323159 799 __builtin_unreachable();
5ba3f43e 800 }
0a7de745 801 return ret;
316670eb
A
802}
803
804/*
805 * tcq_dequeue_tc_ifclassq is a dequeue function to be registered to
806 * (*ifcq_dequeue) in struct ifclass.
807 *
808 * note: CLASSQDQ_POLL returns the next packet without removing the packet
809 * from the queue. CLASSQDQ_REMOVE is a normal dequeue operation.
810 * CLASSQDQ_REMOVE must return the same packet if called immediately
811 * after CLASSQDQ_POLL.
812 */
cb323159 813static void
316670eb 814tcq_dequeue_tc_ifclassq(struct ifclassq *ifq, mbuf_svc_class_t sc,
cb323159 815 classq_pkt_t *cpkt)
316670eb 816{
5ba3f43e 817 pktsched_pkt_t pkt;
316670eb
A
818 u_int32_t i = MBUF_SCIDX(sc);
819
820 VERIFY((u_int32_t)i < IFCQ_SC_MAX);
821
cb323159 822 _PKTSCHED_PKT_INIT(&pkt);
5ba3f43e 823 (tcq_dequeue_cl(ifq->ifcq_disc, ifq->ifcq_disc_slots[i].cl, sc, &pkt));
cb323159 824 *cpkt = pkt.pktsched_pkt;
316670eb
A
825}
826
827static int
828tcq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg)
829{
0a7de745 830 struct tcq_if *tif = (struct tcq_if *)ifq->ifcq_disc;
316670eb
A
831 int err = 0;
832
833 IFCQ_LOCK_ASSERT_HELD(ifq);
834
835 switch (req) {
836 case CLASSQRQ_PURGE:
837 tcq_purge(tif);
838 break;
839
840 case CLASSQRQ_PURGE_SC:
841 tcq_purge_sc(tif, (cqrq_purge_sc_t *)arg);
842 break;
843
844 case CLASSQRQ_EVENT:
845 tcq_event(tif, (cqev_t)arg);
846 break;
847
848 case CLASSQRQ_THROTTLE:
849 err = tcq_throttle(tif, (cqrq_throttle_t *)arg);
850 break;
39236c6e
A
851
852 case CLASSQRQ_STAT_SC:
853 err = tcq_stat_sc(tif, (cqrq_stat_sc_t *)arg);
854 break;
316670eb 855 }
0a7de745 856 return err;
316670eb
A
857}
858
859int
5ba3f43e
A
860tcq_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
861 classq_pkt_type_t ptype)
316670eb
A
862{
863 struct ifnet *ifp = ifq->ifcq_ifp;
864 struct tcq_class *cl0, *cl1, *cl2, *cl3;
865 struct tcq_if *tif;
866 u_int32_t maxlen = 0, qflags = 0;
867 int err = 0;
868
869 IFCQ_LOCK_ASSERT_HELD(ifq);
870 VERIFY(ifq->ifcq_disc == NULL);
871 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
872
0a7de745 873 if (flags & PKTSCHEDF_QALG_SFB) {
316670eb 874 qflags |= TQCF_SFB;
0a7de745
A
875 }
876 if (flags & PKTSCHEDF_QALG_ECN) {
316670eb 877 qflags |= TQCF_ECN;
0a7de745
A
878 }
879 if (flags & PKTSCHEDF_QALG_FLOWCTL) {
316670eb 880 qflags |= TQCF_FLOWCTL;
0a7de745
A
881 }
882 if (flags & PKTSCHEDF_QALG_DELAYBASED) {
fe8ab488 883 qflags |= TQCF_DELAYBASED;
0a7de745 884 }
316670eb 885
5ba3f43e 886 tif = tcq_alloc(ifp, M_WAITOK);
0a7de745
A
887 if (tif == NULL) {
888 return ENOMEM;
889 }
316670eb 890
0a7de745 891 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
316670eb 892 maxlen = if_sndq_maxlen;
0a7de745 893 }
316670eb
A
894
895 if ((err = tcq_add_queue(tif, 0, maxlen,
0a7de745 896 qflags | TQCF_LAZY, SCIDX_BK, &cl0, ptype)) != 0) {
316670eb 897 goto cleanup;
0a7de745 898 }
316670eb
A
899
900 if ((err = tcq_add_queue(tif, 1, maxlen,
0a7de745 901 qflags | TQCF_DEFAULTCLASS, SCIDX_BE, &cl1, ptype)) != 0) {
316670eb 902 goto cleanup;
0a7de745 903 }
316670eb
A
904
905 if ((err = tcq_add_queue(tif, 2, maxlen,
0a7de745 906 qflags | TQCF_LAZY, SCIDX_VI, &cl2, ptype)) != 0) {
316670eb 907 goto cleanup;
0a7de745 908 }
316670eb
A
909
910 if ((err = tcq_add_queue(tif, 3, maxlen,
0a7de745 911 qflags, SCIDX_VO, &cl3, ptype)) != 0) {
316670eb 912 goto cleanup;
0a7de745 913 }
316670eb
A
914
915 err = ifclassq_attach(ifq, PKTSCHEDT_TCQ, tif,
916 tcq_enqueue_ifclassq, NULL, tcq_dequeue_tc_ifclassq,
5ba3f43e 917 NULL, NULL, tcq_request_ifclassq);
316670eb
A
918
919 /* cache these for faster lookup */
920 if (err == 0) {
921 /* Map {BK_SYS,BK} to TC_BK */
922 ifq->ifcq_disc_slots[SCIDX_BK_SYS].qid = SCIDX_BK;
923 ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl = cl0;
924
925 ifq->ifcq_disc_slots[SCIDX_BK].qid = SCIDX_BK;
926 ifq->ifcq_disc_slots[SCIDX_BK].cl = cl0;
927
928 /* Map {BE,RD,OAM} to TC_BE */
929 ifq->ifcq_disc_slots[SCIDX_BE].qid = SCIDX_BE;
930 ifq->ifcq_disc_slots[SCIDX_BE].cl = cl1;
931
932 ifq->ifcq_disc_slots[SCIDX_RD].qid = SCIDX_BE;
933 ifq->ifcq_disc_slots[SCIDX_RD].cl = cl1;
934
935 ifq->ifcq_disc_slots[SCIDX_OAM].qid = SCIDX_BE;
936 ifq->ifcq_disc_slots[SCIDX_OAM].cl = cl1;
937
938 /* Map {AV,RV,VI} to TC_VI */
939 ifq->ifcq_disc_slots[SCIDX_AV].qid = SCIDX_VI;
940 ifq->ifcq_disc_slots[SCIDX_AV].cl = cl2;
941
942 ifq->ifcq_disc_slots[SCIDX_RV].qid = SCIDX_VI;
943 ifq->ifcq_disc_slots[SCIDX_RV].cl = cl2;
944
945 ifq->ifcq_disc_slots[SCIDX_VI].qid = SCIDX_VI;
946 ifq->ifcq_disc_slots[SCIDX_VI].cl = cl2;
947
948 /* Map {VO,CTL} to TC_VO */
949 ifq->ifcq_disc_slots[SCIDX_VO].qid = SCIDX_VO;
950 ifq->ifcq_disc_slots[SCIDX_VO].cl = cl3;
951
952 ifq->ifcq_disc_slots[SCIDX_CTL].qid = SCIDX_VO;
953 ifq->ifcq_disc_slots[SCIDX_CTL].cl = cl3;
954 }
955
956cleanup:
0a7de745 957 if (err != 0) {
316670eb 958 (void) tcq_destroy_locked(tif);
0a7de745 959 }
316670eb 960
0a7de745 961 return err;
316670eb
A
962}
963
964int
965tcq_teardown_ifclassq(struct ifclassq *ifq)
966{
967 struct tcq_if *tif = ifq->ifcq_disc;
968 int i;
969
970 IFCQ_LOCK_ASSERT_HELD(ifq);
971 VERIFY(tif != NULL && ifq->ifcq_type == PKTSCHEDT_TCQ);
972
973 (void) tcq_destroy_locked(tif);
974
975 ifq->ifcq_disc = NULL;
976 for (i = 0; i < IFCQ_SC_MAX; i++) {
977 ifq->ifcq_disc_slots[i].qid = 0;
978 ifq->ifcq_disc_slots[i].cl = NULL;
979 }
980
0a7de745 981 return ifclassq_detach(ifq);
316670eb
A
982}
983
984int
985tcq_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t slot,
986 struct if_ifclassq_stats *ifqs)
987{
988 struct tcq_if *tif = ifq->ifcq_disc;
989
990 IFCQ_LOCK_ASSERT_HELD(ifq);
991 VERIFY(ifq->ifcq_type == PKTSCHEDT_TCQ);
992
0a7de745
A
993 if (slot >= IFCQ_SC_MAX) {
994 return EINVAL;
995 }
316670eb 996
0a7de745
A
997 return tcq_get_class_stats(tif, ifq->ifcq_disc_slots[slot].qid,
998 &ifqs->ifqs_tcq_stats);
316670eb
A
999}
1000
1001static int
1002tcq_throttle(struct tcq_if *tif, cqrq_throttle_t *tr)
1003{
1004 struct ifclassq *ifq = tif->tif_ifq;
1005 struct tcq_class *cl;
39236c6e 1006 int err = 0;
316670eb
A
1007
1008 IFCQ_LOCK_ASSERT_HELD(ifq);
316670eb
A
1009
1010 if (!tr->set) {
1011 tr->level = tif->tif_throttle;
0a7de745 1012 return 0;
316670eb
A
1013 }
1014
0a7de745
A
1015 if (tr->level == tif->tif_throttle) {
1016 return EALREADY;
1017 }
316670eb
A
1018
1019 /* Current throttling levels only involve BK_SYS class */
1020 cl = ifq->ifcq_disc_slots[SCIDX_BK_SYS].cl;
1021
1022 switch (tr->level) {
1023 case IFNET_THROTTLE_OFF:
1024 err = tcq_resumeq(tif, cl);
1025 break;
1026
1027 case IFNET_THROTTLE_OPPORTUNISTIC:
1028 err = tcq_suspendq(tif, cl);
1029 break;
1030
1031 default:
1032 VERIFY(0);
1033 /* NOTREACHED */
1034 }
1035
1036 if (err == 0 || err == ENXIO) {
1037 if (pktsched_verbose) {
1038 log(LOG_DEBUG, "%s: %s throttling %slevel set %d->%d\n",
1039 if_name(TCQIF_IFP(tif)), tcq_style(tif),
1040 (err == 0) ? "" : "lazy ", tif->tif_throttle,
1041 tr->level);
1042 }
1043 tif->tif_throttle = tr->level;
0a7de745 1044 if (err != 0) {
316670eb 1045 err = 0;
0a7de745 1046 } else {
316670eb 1047 tcq_purgeq(tif, cl, 0, NULL, NULL);
0a7de745 1048 }
316670eb
A
1049 } else {
1050 log(LOG_ERR, "%s: %s unable to set throttling level "
1051 "%d->%d [error=%d]\n", if_name(TCQIF_IFP(tif)),
1052 tcq_style(tif), tif->tif_throttle, tr->level, err);
1053 }
1054
0a7de745 1055 return err;
316670eb
A
1056}
1057
1058static int
1059tcq_resumeq(struct tcq_if *tif, struct tcq_class *cl)
1060{
1061 struct ifclassq *ifq = tif->tif_ifq;
1062 int err = 0;
5ba3f43e
A
1063#if !MACH_ASSERT
1064#pragma unused(ifq)
1065#endif
316670eb
A
1066 IFCQ_LOCK_ASSERT_HELD(ifq);
1067
0a7de745 1068 if (q_is_sfb(&cl->cl_q) && cl->cl_sfb != NULL) {
316670eb 1069 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, FALSE);
0a7de745 1070 }
316670eb 1071
0a7de745 1072 if (err == 0) {
316670eb 1073 qstate(&cl->cl_q) = QS_RUNNING;
0a7de745 1074 }
316670eb 1075
0a7de745 1076 return err;
316670eb
A
1077}
1078
1079static int
1080tcq_suspendq(struct tcq_if *tif, struct tcq_class *cl)
1081{
1082 struct ifclassq *ifq = tif->tif_ifq;
1083 int err = 0;
5ba3f43e
A
1084#if !MACH_ASSERT
1085#pragma unused(ifq)
1086#endif
316670eb
A
1087 IFCQ_LOCK_ASSERT_HELD(ifq);
1088
316670eb
A
1089 if (q_is_sfb(&cl->cl_q)) {
1090 if (cl->cl_sfb != NULL) {
1091 err = sfb_suspendq(cl->cl_sfb, &cl->cl_q, TRUE);
1092 } else {
1093 VERIFY(cl->cl_flags & TQCF_LAZY);
0a7de745 1094 err = ENXIO; /* delayed throttling */
316670eb
A
1095 }
1096 }
1097
0a7de745 1098 if (err == 0 || err == ENXIO) {
316670eb 1099 qstate(&cl->cl_q) = QS_SUSPENDED;
0a7de745 1100 }
316670eb 1101
0a7de745 1102 return err;
316670eb 1103}