]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/pktsched/pktsched_fq_codel.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / bsd / net / pktsched / pktsched_fq_codel.c
1 /*
2 * Copyright (c) 2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/if_var.h>
33 #include <net/if.h>
34 #include <net/classq/classq.h>
35 #include <net/classq/classq_fq_codel.h>
36 #include <net/pktsched/pktsched_fq_codel.h>
37
38
39 static size_t fq_if_size;
40 static struct zone *fq_if_zone;
41
42 static fq_if_t *fq_if_alloc(struct ifnet *ifp, int how);
43 static void fq_if_destroy(fq_if_t *fqs);
44 static void fq_if_classq_init(fq_if_t *fqs, u_int32_t priority,
45 u_int32_t quantum, u_int32_t drr_max, u_int32_t svc_class);
46 static int fq_if_enqueue_classq(struct ifclassq *ifq, struct mbuf *m);
47 static struct mbuf *fq_if_dequeue_classq(struct ifclassq *ifq, cqdq_op_t);
48 static int fq_if_dequeue_classq_multi(struct ifclassq *, cqdq_op_t,
49 u_int32_t, u_int32_t, struct mbuf **, struct mbuf **, u_int32_t *,
50 u_int32_t *);
51 static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, u_int32_t,
52 u_int32_t, struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
53 static int fq_if_request_classq(struct ifclassq *ifq, cqrq_t op, void *arg);
54 void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
55 static void fq_if_purge(fq_if_t *);
56 static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
57 static void fq_if_purge_flow(fq_if_t *, fq_t *, u_int32_t *, u_int32_t *);
58 static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl,
59 bool add_to_old);
60 static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
61 fq_t *fq, bool remove_hash);
62 static void fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
63 fq_t *fq);
64
65 #define FQ_IF_ZONE_MAX 32 /* Maximum elements in zone */
66 #define FQ_IF_ZONE_NAME "pktsched_fq_if" /* zone for fq_if class */
67
68 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
69 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
70
71 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
72 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
73 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
74
75 void
76 fq_codel_scheduler_init(void)
77 {
78 /* Initialize the zone for flow queue structures */
79 fq_codel_init();
80
81 fq_if_size = sizeof (fq_if_t);
82 fq_if_zone = zinit(fq_if_size, (FQ_IF_ZONE_MAX * fq_if_size), 0,
83 FQ_IF_ZONE_NAME);
84 if (fq_if_zone == NULL) {
85 panic("%s: failed allocating from %s", __func__,
86 (FQ_IF_ZONE_NAME));
87 }
88 zone_change(fq_if_zone, Z_EXPAND, TRUE);
89 zone_change(fq_if_zone, Z_CALLERACCT, TRUE);
90
91 }
92
93 fq_if_t *
94 fq_if_alloc(struct ifnet *ifp, int how)
95 {
96 fq_if_t *fqs;
97 fqs = (how == M_WAITOK) ? zalloc(fq_if_zone) :
98 zalloc_noblock(fq_if_zone);
99 if (fqs == NULL)
100 return (NULL);
101
102 bzero(fqs, fq_if_size);
103 fqs->fqs_ifq = &ifp->if_snd;
104
105 /* Calculate target queue delay */
106 ifclassq_calc_target_qdelay(ifp, &fqs->fqs_target_qdelay);
107
108 /* Calculate update interval */
109 ifclassq_calc_update_interval(&fqs->fqs_update_interval);
110 fqs->fqs_pkt_droplimit = FQ_IF_MAX_PKT_LIMIT;
111 STAILQ_INIT(&fqs->fqs_fclist);
112 return (fqs);
113 }
114
115 void
116 fq_if_destroy(fq_if_t *fqs)
117 {
118 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
119 fq_if_purge(fqs);
120 fqs->fqs_ifq = NULL;
121 zfree(fq_if_zone, fqs);
122 }
123
124 static inline u_int32_t
125 fq_if_service_to_priority(mbuf_svc_class_t svc)
126 {
127 u_int32_t pri;
128
129 switch (svc) {
130 case MBUF_SC_BK_SYS:
131 pri = FQ_IF_BK_SYS_INDEX;
132 break;
133 case MBUF_SC_BK:
134 pri = FQ_IF_BK_INDEX;
135 break;
136 case MBUF_SC_BE:
137 pri = FQ_IF_BE_INDEX;
138 break;
139 case MBUF_SC_RD:
140 pri = FQ_IF_RD_INDEX;
141 break;
142 case MBUF_SC_OAM:
143 pri = FQ_IF_OAM_INDEX;
144 break;
145 case MBUF_SC_AV:
146 pri = FQ_IF_AV_INDEX;
147 break;
148 case MBUF_SC_RV:
149 pri = FQ_IF_RV_INDEX;
150 break;
151 case MBUF_SC_VI:
152 pri = FQ_IF_VI_INDEX;
153 break;
154 case MBUF_SC_VO:
155 pri = FQ_IF_VO_INDEX;
156 break;
157 case MBUF_SC_CTL:
158 pri = FQ_IF_CTL_INDEX;
159 break;
160 default:
161 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
162 break;
163 }
164 return (pri);
165 }
166
167 void
168 fq_if_classq_init(fq_if_t *fqs, u_int32_t pri, u_int32_t quantum,
169 u_int32_t drr_max, u_int32_t svc_class)
170 {
171 fq_if_classq_t *fq_cl;
172
173 fq_cl = &fqs->fqs_classq[pri];
174
175 VERIFY(pri >= 0 && pri < FQ_IF_MAX_CLASSES &&
176 fq_cl->fcl_quantum == 0);
177 fq_cl->fcl_quantum = quantum;
178 fq_cl->fcl_pri = pri;
179 fq_cl->fcl_drr_max = drr_max;
180 fq_cl->fcl_service_class = svc_class;
181 STAILQ_INIT(&fq_cl->fcl_new_flows);
182 STAILQ_INIT(&fq_cl->fcl_old_flows);
183 }
184
185 int
186 fq_if_enqueue_classq(struct ifclassq *ifq, struct mbuf *m)
187 {
188 u_int32_t pri;
189 fq_if_t *fqs;
190 fq_if_classq_t *fq_cl;
191 int ret, len;
192 mbuf_svc_class_t svc;
193
194 IFCQ_LOCK_ASSERT_HELD(ifq);
195 if (!(m->m_flags & M_PKTHDR)) {
196 IFCQ_CONVERT_LOCK(ifq);
197 m_freem(m);
198 return (ENOBUFS);
199 }
200
201 fqs = (fq_if_t *)ifq->ifcq_disc;
202 svc = mbuf_get_service_class(m);
203 pri = fq_if_service_to_priority(svc);
204 VERIFY(pri >= 0 && pri < FQ_IF_MAX_CLASSES);
205 fq_cl = &fqs->fqs_classq[pri];
206
207 if (svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1) {
208 /* BK_SYS is currently throttled */
209 fq_cl->fcl_stat.fcl_throttle_drops++;
210 IFCQ_CONVERT_LOCK(ifq);
211 m_freem(m);
212 return (EQSUSPENDED);
213 }
214
215 len = m_length(m);
216 ret = fq_addq(fqs, m, fq_cl);
217 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
218 if (((fqs->fqs_bitmaps[FQ_IF_ER] | fqs->fqs_bitmaps[FQ_IF_EB]) &
219 (1 << pri)) == 0) {
220 /*
221 * this group is not in ER or EB groups,
222 * mark it as IB
223 */
224 pktsched_bit_set(pri, &fqs->fqs_bitmaps[FQ_IF_IB]);
225 }
226 }
227
228 if (ret != 0) {
229 if (ret == CLASSQEQ_SUCCESS_FC) {
230 /* packet enqueued, return advisory feedback */
231 ret = EQFULL;
232 } else {
233 VERIFY(ret == CLASSQEQ_DROPPED ||
234 ret == CLASSQEQ_DROPPED_FC ||
235 ret == CLASSQEQ_DROPPED_SP);
236 switch (ret) {
237 case CLASSQEQ_DROPPED:
238 return (ENOBUFS);
239 case CLASSQEQ_DROPPED_FC:
240 return (EQFULL);
241 case CLASSQEQ_DROPPED_SP:
242 return (EQSUSPENDED);
243 }
244 }
245 }
246 IFCQ_INC_LEN(ifq);
247 IFCQ_INC_BYTES(ifq, len);
248 return (ret);
249 }
250
251 struct mbuf *
252 fq_if_dequeue_classq(struct ifclassq *ifq, cqdq_op_t op)
253 {
254 struct mbuf *top;
255
256 (void) fq_if_dequeue_classq_multi(ifq, op, 1,
257 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &top, NULL, NULL, NULL);
258
259 return (top);
260 }
261
262 int
263 fq_if_dequeue_classq_multi(struct ifclassq *ifq, cqdq_op_t op,
264 u_int32_t maxpktcnt, u_int32_t maxbytecnt, struct mbuf **first_packet,
265 struct mbuf **last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt)
266 {
267 #pragma unused(op)
268 struct mbuf *top = NULL, *tail = NULL, *first, *last;
269 u_int32_t pktcnt = 0, bytecnt = 0, total_pktcnt, total_bytecnt;
270 fq_if_t *fqs;
271 fq_if_classq_t *fq_cl;
272 int pri;
273
274 IFCQ_LOCK_ASSERT_HELD(ifq);
275
276 fqs = (fq_if_t *)ifq->ifcq_disc;
277
278 first = last = NULL;
279 total_pktcnt = total_bytecnt = 0;
280
281 for (;;) {
282 if (fqs->fqs_bitmaps[FQ_IF_ER] == 0 &&
283 fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
284 fqs->fqs_bitmaps[FQ_IF_EB] = fqs->fqs_bitmaps[FQ_IF_IB];
285 fqs->fqs_bitmaps[FQ_IF_IB] = 0;
286 if (fqs->fqs_bitmaps[FQ_IF_EB] == 0)
287 break;
288 }
289 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_ER]);
290 if (pri == 0) {
291 /*
292 * There are no ER flows, move the highest
293 * priority one from EB if there are any in that
294 * category
295 */
296 pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_EB]);
297 VERIFY(pri > 0);
298 pktsched_bit_clr((pri - 1),
299 &fqs->fqs_bitmaps[FQ_IF_EB]);
300 pktsched_bit_set((pri - 1),
301 &fqs->fqs_bitmaps[FQ_IF_ER]);
302 }
303 pri--; /* index starts at 0 */
304 fq_cl = &fqs->fqs_classq[pri];
305
306 if (fq_cl->fcl_budget <= 0) {
307 /* Update the budget */
308 fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
309 fq_cl->fcl_stat.fcl_flows_cnt) *
310 fq_cl->fcl_quantum);
311 if (fq_cl->fcl_budget <= 0)
312 goto state_change;
313 }
314 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
315 (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt,
316 &bytecnt);
317 if (top != NULL) {
318 VERIFY(pktcnt > 0 && bytecnt > 0);
319 if (first == NULL) {
320 first = top;
321 last = tail;
322 total_pktcnt = pktcnt;
323 total_bytecnt = bytecnt;
324 } else {
325 last->m_nextpkt = top;
326 last = tail;
327 total_pktcnt += pktcnt;
328 total_bytecnt += bytecnt;
329 }
330 last->m_nextpkt = NULL;
331 fq_cl->fcl_budget -= bytecnt;
332 pktcnt = 0;
333 bytecnt = 0;
334 }
335
336 /*
337 * If the class has exceeded the budget but still has data
338 * to send, move it to IB
339 */
340 state_change:
341 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
342 if (fq_cl->fcl_budget <= 0) {
343 pktsched_bit_set(pri,
344 &fqs->fqs_bitmaps[FQ_IF_IB]);
345 pktsched_bit_clr(pri,
346 &fqs->fqs_bitmaps[FQ_IF_ER]);
347 }
348 } else {
349 pktsched_bit_clr(pri, &fqs->fqs_bitmaps[FQ_IF_ER]);
350 VERIFY(((fqs->fqs_bitmaps[FQ_IF_ER] |
351 fqs->fqs_bitmaps[FQ_IF_EB] |
352 fqs->fqs_bitmaps[FQ_IF_IB])&(1 << pri)) == 0);
353 fq_cl->fcl_budget = 0;
354 }
355 if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt)
356 break;
357 }
358 if (first != NULL) {
359 if (first_packet != NULL)
360 *first_packet = first;
361 if (last_packet != NULL)
362 *last_packet = last;
363 if (retpktcnt != NULL)
364 *retpktcnt = total_pktcnt;
365 if (retbytecnt != NULL)
366 *retbytecnt = total_bytecnt;
367 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
368 } else {
369 if (first_packet != NULL)
370 *first_packet = NULL;
371 if (last_packet != NULL)
372 *last_packet = NULL;
373 if (retpktcnt != NULL)
374 *retpktcnt = 0;
375 if (retbytecnt != NULL)
376 *retbytecnt = 0;
377 }
378 return (0);
379 }
380
381 static void
382 fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, u_int32_t *pktsp,
383 u_int32_t *bytesp)
384 {
385 fq_if_classq_t *fq_cl;
386 u_int32_t pkts, bytes;
387 struct mbuf *m;
388
389 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
390 pkts = bytes = 0;
391 while ((m = fq_getq_flow(fqs, fq)) != NULL) {
392 pkts++;
393 bytes += m_length(m);
394 m_freem(m);
395 m = NULL;
396 }
397 IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
398
399 if (fq->fq_flags & FQF_NEW_FLOW) {
400 fq_if_empty_new_flow(fq, fq_cl, false);
401 } else if (fq->fq_flags & FQF_OLD_FLOW) {
402 fq_if_empty_old_flow(fqs, fq_cl, fq, false);
403 }
404
405 fq_if_destroy_flow(fqs, fq_cl, fq);
406
407 if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
408 int i;
409 for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
410 pktsched_bit_clr(fq_cl->fcl_pri,
411 &fqs->fqs_bitmaps[i]);
412 }
413 }
414 if (pktsp != NULL)
415 *pktsp = pkts;
416 if (bytesp != NULL)
417 *bytesp = bytes;
418 }
419
420 static void
421 fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
422 {
423 fq_t *fq, *tfq;
424 /*
425 * Take each flow from new/old flow list and flush mbufs
426 * in that flow
427 */
428 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
429 fq_if_purge_flow(fqs, fq, NULL, NULL);
430 }
431 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
432 fq_if_purge_flow(fqs, fq, NULL, NULL);
433 }
434 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
435 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
436
437 STAILQ_INIT(&fq_cl->fcl_new_flows);
438 STAILQ_INIT(&fq_cl->fcl_old_flows);
439 fq_cl->fcl_budget = 0;
440 }
441
442 static void
443 fq_if_purge(fq_if_t *fqs)
444 {
445 int i;
446
447 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
448 for (i = 0; i < FQ_IF_MAX_CLASSES; i++) {
449 fq_if_purge_classq(fqs, &fqs->fqs_classq[i]);
450 }
451
452 VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
453
454 fqs->fqs_large_flow = NULL;
455 for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
456 VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
457 }
458
459 bzero(&fqs->fqs_bitmaps, sizeof (fqs->fqs_bitmaps));
460
461 IFCQ_LEN(fqs->fqs_ifq) = 0;
462 IFCQ_BYTES(fqs->fqs_ifq) = 0;
463 }
464
465 static void
466 fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
467 {
468 fq_t *fq;
469
470 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
471 req->packets = req->bytes = 0;
472 VERIFY(req->flow != 0);
473
474 fq = fq_if_hash_pkt(fqs, req->flow, req->sc, 0, FALSE);
475
476 if (fq != NULL)
477 fq_if_purge_flow(fqs, fq, &req->packets, &req->bytes);
478 }
479
480 static void
481 fq_if_event(fq_if_t *fqs, cqev_t ev)
482 {
483 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
484
485 switch (ev) {
486 case CLASSQ_EV_LINK_UP:
487 case CLASSQ_EV_LINK_DOWN:
488 fq_if_purge(fqs);
489 break;
490 default:
491 break;
492 }
493 }
494
495 static void
496 fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
497 {
498 fq_if_purge_classq(fqs, fq_cl);
499 fqs->fqs_throttle = 1;
500 fq_cl->fcl_stat.fcl_throttle_on++;
501 }
502
503 static void
504 fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
505 {
506 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
507 fqs->fqs_throttle = 0;
508 fq_cl->fcl_stat.fcl_throttle_off++;
509 }
510
511
512 static int
513 fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
514 {
515 struct ifclassq *ifq = fqs->fqs_ifq;
516 int index;
517
518 IFCQ_LOCK_ASSERT_HELD(ifq);
519
520 if (!tr->set) {
521 tr->level = fqs->fqs_throttle;
522 return (0);
523 }
524
525 if (tr->level == fqs->fqs_throttle)
526 return (EALREADY);
527
528 /* Throttling is allowed on BK_SYS class only */
529 index = fq_if_service_to_priority(MBUF_SC_BK_SYS);
530 switch (tr->level) {
531 case IFNET_THROTTLE_OFF:
532 fq_if_classq_resume(fqs, &fqs->fqs_classq[index]);
533 break;
534 case IFNET_THROTTLE_OPPORTUNISTIC:
535 fq_if_classq_suspend(fqs, &fqs->fqs_classq[index]);
536 break;
537 default:
538 break;
539 }
540 return (0);
541 }
542
543 void
544 fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
545 {
546 u_int32_t pri;
547 fq_if_classq_t *fq_cl;
548
549 if (stat == NULL)
550 return;
551
552 pri = fq_if_service_to_priority(stat->sc);
553 fq_cl = &fqs->fqs_classq[pri];
554 stat->packets = fq_cl->fcl_stat.fcl_pkt_cnt;
555 stat->bytes = fq_cl->fcl_stat.fcl_byte_cnt;
556 }
557
558 int
559 fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
560 {
561 int err = 0;
562 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
563
564 IFCQ_LOCK_ASSERT_HELD(ifq);
565
566 /*
567 * These are usually slow operations, convert the lock ahead of time
568 */
569 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
570 switch (rq) {
571 case CLASSQRQ_PURGE:
572 fq_if_purge(fqs);
573 break;
574 case CLASSQRQ_PURGE_SC:
575 fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
576 break;
577 case CLASSQRQ_EVENT:
578 fq_if_event(fqs, (cqev_t)arg);
579 break;
580 case CLASSQRQ_THROTTLE:
581 fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
582 break;
583 case CLASSQRQ_STAT_SC:
584 fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
585 break;
586 }
587 return (err);
588 }
589
590 int
591 fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags)
592 {
593 #pragma unused(flags)
594 struct ifnet *ifp = ifq->ifcq_ifp;
595 fq_if_t *fqs = NULL;
596 int err = 0;
597
598 IFCQ_LOCK_ASSERT_HELD(ifq);
599 VERIFY(ifq->ifcq_disc == NULL);
600 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
601
602 fqs = fq_if_alloc(ifp, M_WAITOK);
603 if (fqs == NULL)
604 return (ENOMEM);
605
606 fq_if_classq_init(fqs, FQ_IF_BK_SYS_INDEX, 1500, 2, MBUF_SC_BK_SYS);
607 fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500, 2, MBUF_SC_BK);
608 fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500, 4, MBUF_SC_BE);
609 fq_if_classq_init(fqs, FQ_IF_RD_INDEX, 1500, 4, MBUF_SC_RD);
610 fq_if_classq_init(fqs, FQ_IF_OAM_INDEX, 1500, 4, MBUF_SC_OAM);
611 fq_if_classq_init(fqs, FQ_IF_AV_INDEX, 3000, 6, MBUF_SC_AV);
612 fq_if_classq_init(fqs, FQ_IF_RV_INDEX, 3000, 6, MBUF_SC_RV);
613 fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000, 6, MBUF_SC_VI);
614 fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600, 8, MBUF_SC_VO);
615 fq_if_classq_init(fqs, FQ_IF_CTL_INDEX, 600, 8, MBUF_SC_CTL);
616
617 err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs,
618 fq_if_enqueue_classq, fq_if_dequeue_classq, NULL,
619 fq_if_dequeue_classq_multi, fq_if_request_classq);
620
621 if (err != 0) {
622 printf("%s: error from ifclassq_attach, "
623 "failed to attach fq_if: %d\n", __func__, err);
624 fq_if_destroy(fqs);
625 }
626 return (err);
627 }
628
629 fq_t *
630 fq_if_hash_pkt(fq_if_t *fqs, u_int32_t flowid, mbuf_svc_class_t svc_class,
631 u_int64_t now, boolean_t create)
632 {
633 fq_t *fq = NULL;
634 flowq_list_t *fq_list;
635 fq_if_classq_t *fq_cl;
636 u_int8_t fqs_hash_id;
637 u_int8_t scidx;
638
639 scidx = fq_if_service_to_priority(svc_class);
640
641 fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
642
643 fq_list = &fqs->fqs_flows[fqs_hash_id];
644
645 SLIST_FOREACH(fq, fq_list, fq_hashlink) {
646 if (fq->fq_flowhash == flowid &&
647 fq->fq_sc_index == scidx)
648 break;
649 }
650 if (fq == NULL && create == TRUE) {
651 /* If the flow is not already on the list, allocate it */
652 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
653 fq = fq_alloc(M_WAITOK);
654 if (fq != NULL) {
655 fq->fq_flowhash = flowid;
656 fq->fq_sc_index = scidx;
657 fq->fq_updatetime = now + fqs->fqs_update_interval;
658 fq_cl = &fqs->fqs_classq[scidx];
659
660 fq->fq_flags = FQF_FLOWCTL_CAPABLE;
661 SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
662 fq_cl->fcl_stat.fcl_flows_cnt++;
663 }
664 }
665
666 /*
667 * If getq time is not set because this is the first packet or after
668 * idle time, set it now so that we can detect a stall.
669 */
670 if (fq->fq_getqtime == 0)
671 fq->fq_getqtime = now;
672
673 return (fq);
674 }
675
676 static void
677 fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq)
678 {
679 u_int8_t hash_id;
680 hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
681 SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
682 fq_hashlink);
683 fq_cl->fcl_stat.fcl_flows_cnt--;
684 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
685 fq_destroy(fq);
686
687 }
688
689 inline boolean_t
690 fq_if_at_drop_limit(fq_if_t *fqs)
691 {
692 return (((IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
693 TRUE : FALSE));
694 }
695
696 static void
697 fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
698 bool remove_hash)
699 {
700 /*
701 * Remove the flow queue if it is empty
702 * and delete it
703 */
704 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq,
705 fq_actlink);
706 fq->fq_flags &= ~FQF_OLD_FLOW;
707 fq_cl->fcl_stat.fcl_oldflows_cnt--;
708 VERIFY(fq->fq_bytes == 0);
709
710 if (remove_hash) {
711 /* Remove from the hash list */
712 fq_if_destroy_flow(fqs, fq_cl, fq);
713 }
714 }
715
716 static void
717 fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, bool add_to_old)
718 {
719 /* Move to the end of old queue list */
720 STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
721 flowq, fq_actlink);
722 fq->fq_flags &= ~FQF_NEW_FLOW;
723 fq_cl->fcl_stat.fcl_newflows_cnt--;
724
725 if (add_to_old) {
726 STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq,
727 fq_actlink);
728 fq->fq_flags |= FQF_OLD_FLOW;
729 fq_cl->fcl_stat.fcl_oldflows_cnt++;
730 }
731 }
732
733 inline void
734 fq_if_drop_packet(fq_if_t *fqs)
735 {
736 fq_t *fq = fqs->fqs_large_flow;
737 struct mbuf *m;
738 fq_if_classq_t *fq_cl;
739
740 if (fq == NULL)
741 return;
742 /* mbufq can not be empty on the largest flow */
743 VERIFY(!MBUFQ_EMPTY(&fq->fq_mbufq));
744
745 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
746
747 m = fq_getq_flow(fqs, fq);
748
749 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
750 if (MBUFQ_EMPTY(&fq->fq_mbufq)) {
751 if (fq->fq_flags & FQF_OLD_FLOW) {
752 fq_if_empty_old_flow(fqs, fq_cl, fq, true);
753 } else {
754 VERIFY(fq->fq_flags & FQF_NEW_FLOW);
755 fq_if_empty_new_flow(fq, fq_cl, true);
756 }
757 }
758 IFCQ_DROP_ADD(fqs->fqs_ifq, 1, m_length(m));
759
760 m_freem(m);
761 fq_cl->fcl_stat.fcl_drop_overflow++;
762 }
763
764 inline void
765 fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
766 {
767 fq_t *prev_fq = fqs->fqs_large_flow;
768 if (prev_fq == NULL && !MBUFQ_EMPTY(&fq->fq_mbufq)) {
769 fqs->fqs_large_flow = fq;
770 return;
771 } else if (fq->fq_bytes > prev_fq->fq_bytes) {
772 fqs->fqs_large_flow = fq;
773 }
774 }
775
776 boolean_t
777 fq_if_add_fcentry(fq_if_t *fqs, struct pkthdr *pkt, fq_if_classq_t *fq_cl)
778 {
779 struct flowadv_fcentry *fce;
780 u_int32_t flowsrc, flowid;
781
782 flowsrc = pkt->pkt_flowsrc;
783 flowid = pkt->pkt_flowid;
784
785 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
786 if (fce->fce_flowsrc == flowsrc &&
787 fce->fce_flowid == flowid) {
788 /* Already on flowcontrol list */
789 return (TRUE);
790 }
791 }
792
793 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
794 fce = flowadv_alloc_entry(M_WAITOK);
795 if (fce != NULL) {
796 fce->fce_flowsrc = flowsrc;
797 fce->fce_flowid = flowid;
798 /* XXX Add number of bytes in the queue */
799 STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
800 fq_cl->fcl_stat.fcl_flow_control++;
801 }
802 return ((fce != NULL) ? TRUE : FALSE);
803 }
804
805 void
806 fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
807 {
808 struct flowadv_fcentry *fce = NULL;
809
810 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
811 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
812 if (fce->fce_flowid == fq->fq_flowhash)
813 break;
814 }
815 if (fce != NULL) {
816 STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry,
817 fce_link);
818 STAILQ_NEXT(fce, fce_link) = NULL;
819 flowadv_add_entry(fce);
820 fq_cl->fcl_stat.fcl_flow_feedback++;
821 }
822 fq->fq_flags &= ~FQF_FLOWCTL_ON;
823 }
824
825 void
826 fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, u_int32_t pktlimit,
827 u_int32_t bytelimit, struct mbuf **top, struct mbuf **tail,
828 u_int32_t *retpktcnt, u_int32_t *retbytecnt)
829 {
830 fq_t *fq = NULL, *tfq = NULL;
831 struct mbuf *m = NULL, *last = NULL;
832 flowq_stailq_t temp_stailq;
833 u_int32_t pktcnt, bytecnt, mlen;
834 boolean_t limit_reached = FALSE;
835
836 /*
837 * maximum byte limit should not be greater than the budget for
838 * this class
839 */
840 if ((int32_t)bytelimit > fq_cl->fcl_budget)
841 bytelimit = fq_cl->fcl_budget;
842
843 VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
844
845 *top = NULL;
846 pktcnt = bytecnt = 0;
847 STAILQ_INIT(&temp_stailq);
848
849 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
850 VERIFY((fq->fq_flags & (FQF_NEW_FLOW|FQF_OLD_FLOW)) ==
851 FQF_NEW_FLOW);
852 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
853 !MBUFQ_EMPTY(&fq->fq_mbufq)) {
854
855 m = fq_getq_flow(fqs, fq);
856 m->m_pkthdr.pkt_flags |= PKTF_NEW_FLOW;
857 mlen = m_length(m);
858 fq->fq_deficit -= mlen;
859
860 if (*top == NULL) {
861 *top = m;
862 } else {
863 last->m_nextpkt = m;
864 }
865 last = m;
866 last->m_nextpkt = NULL;
867 fq_cl->fcl_stat.fcl_dequeue++;
868 fq_cl->fcl_stat.fcl_dequeue_bytes += mlen;
869
870 pktcnt++;
871 bytecnt += mlen;
872
873 /* Check if the limit is reached */
874 if (pktcnt >= pktlimit || bytecnt >= bytelimit)
875 limit_reached = TRUE;
876 }
877
878 if (fq->fq_deficit <= 0 || MBUFQ_EMPTY(&fq->fq_mbufq)) {
879 fq_if_empty_new_flow(fq, fq_cl, true);
880 fq->fq_deficit += fq_cl->fcl_quantum;
881 }
882 if (limit_reached == TRUE)
883 goto done;
884 }
885
886 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
887 VERIFY((fq->fq_flags & (FQF_NEW_FLOW|FQF_OLD_FLOW)) ==
888 FQF_OLD_FLOW);
889 while (fq->fq_deficit > 0 && !MBUFQ_EMPTY(&fq->fq_mbufq) &&
890 limit_reached == FALSE) {
891 m = fq_getq_flow(fqs, fq);
892 mlen = m_length(m);
893 fq->fq_deficit -= mlen;
894 if (*top == NULL) {
895 *top = m;
896 } else {
897 last->m_nextpkt = m;
898 }
899 last = m;
900 last->m_nextpkt = NULL;
901 fq_cl->fcl_stat.fcl_dequeue++;
902 fq_cl->fcl_stat.fcl_dequeue_bytes += mlen;
903
904 pktcnt++;
905 bytecnt += mlen;
906
907 /* Check if the limit is reached */
908 if (pktcnt >= pktlimit || bytecnt >= bytelimit)
909 limit_reached = TRUE;
910 }
911
912 if (MBUFQ_EMPTY(&fq->fq_mbufq)) {
913 fq_if_empty_old_flow(fqs, fq_cl, fq, true);
914 } else if (fq->fq_deficit <= 0) {
915 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
916 flowq, fq_actlink);
917 /*
918 * Move to the end of the old queues list. We do not
919 * need to update the flow count since this flow
920 * will be added to the tail again
921 */
922 STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
923 fq->fq_deficit += fq_cl->fcl_quantum;
924 }
925
926 if (limit_reached == TRUE)
927 break;
928 }
929
930 done:
931 if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
932 STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
933 } else if (!STAILQ_EMPTY(&temp_stailq)) {
934 fq_cl->fcl_old_flows = temp_stailq;
935 }
936
937 if (last != NULL) {
938 VERIFY(*top != NULL);
939 if (tail != NULL)
940 *tail = last;
941 if (retpktcnt != NULL)
942 *retpktcnt = pktcnt;
943 if (retbytecnt != NULL)
944 *retbytecnt = bytecnt;
945 }
946 }
947
948 int
949 fq_if_teardown_ifclassq(struct ifclassq *ifq)
950 {
951 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
952
953 IFCQ_LOCK_ASSERT_HELD(ifq);
954 VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
955
956 fq_if_destroy(fqs);
957 ifq->ifcq_disc = NULL;
958
959 return (ifclassq_detach(ifq));
960 }
961
962 int
963 fq_if_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t qid,
964 struct if_ifclassq_stats *ifqs)
965 {
966 struct fq_codel_classstats *fcls;
967 fq_if_classq_t *fq_cl;
968 fq_if_t *fqs;
969
970 if (qid >= FQ_IF_MAX_CLASSES)
971 return (EINVAL);
972
973 fqs = (fq_if_t *)ifq->ifcq_disc;
974 fcls = &ifqs->ifqs_fq_codel_stats;
975
976 fq_cl = &fqs->fqs_classq[qid];
977
978 fcls->fcls_pri = fq_cl->fcl_pri;
979 fcls->fcls_service_class = fq_cl->fcl_service_class;
980 fcls->fcls_quantum = fq_cl->fcl_quantum;
981 fcls->fcls_drr_max = fq_cl->fcl_drr_max;
982 fcls->fcls_budget = fq_cl->fcl_budget;
983 fcls->fcls_target_qdelay = fqs->fqs_target_qdelay;
984 fcls->fcls_update_interval = fqs->fqs_update_interval;
985 fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
986 fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
987 fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
988 fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
989 fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
990 fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
991 fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
992 fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
993 fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
994 fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
995 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
996 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
997 fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
998 fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
999 fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
1000 fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
1001 fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
1002 fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
1003 fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
1004
1005 return (0);
1006 }