2 * Copyright (c) 2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/if_var.h>
34 #include <net/classq/classq.h>
35 #include <net/classq/classq_fq_codel.h>
36 #include <net/pktsched/pktsched_fq_codel.h>
39 static size_t fq_if_size
;
40 static struct zone
*fq_if_zone
;
42 static fq_if_t
*fq_if_alloc(struct ifnet
*ifp
, int how
);
43 static void fq_if_destroy(fq_if_t
*fqs
);
44 static void fq_if_classq_init(fq_if_t
*fqs
, u_int32_t priority
,
45 u_int32_t quantum
, u_int32_t drr_max
, u_int32_t svc_class
);
46 static int fq_if_enqueue_classq(struct ifclassq
*ifq
, struct mbuf
*m
);
47 static struct mbuf
*fq_if_dequeue_classq(struct ifclassq
*ifq
, cqdq_op_t
);
48 static int fq_if_dequeue_classq_multi(struct ifclassq
*, cqdq_op_t
,
49 u_int32_t
, u_int32_t
, struct mbuf
**, struct mbuf
**, u_int32_t
*,
51 static void fq_if_dequeue(fq_if_t
*, fq_if_classq_t
*, u_int32_t
,
52 u_int32_t
, struct mbuf
**, struct mbuf
**, u_int32_t
*, u_int32_t
*);
53 static int fq_if_request_classq(struct ifclassq
*ifq
, cqrq_t op
, void *arg
);
54 void fq_if_stat_sc(fq_if_t
*fqs
, cqrq_stat_sc_t
*stat
);
55 static void fq_if_purge(fq_if_t
*);
56 static void fq_if_purge_classq(fq_if_t
*, fq_if_classq_t
*);
57 static void fq_if_purge_flow(fq_if_t
*, fq_t
*, u_int32_t
*, u_int32_t
*);
58 static void fq_if_empty_new_flow(fq_t
*fq
, fq_if_classq_t
*fq_cl
,
60 static void fq_if_empty_old_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
,
61 fq_t
*fq
, bool remove_hash
);
62 static void fq_if_destroy_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
,
65 #define FQ_IF_ZONE_MAX 32 /* Maximum elements in zone */
66 #define FQ_IF_ZONE_NAME "pktsched_fq_if" /* zone for fq_if class */
68 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
69 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
71 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
72 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
73 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
76 fq_codel_scheduler_init(void)
78 /* Initialize the zone for flow queue structures */
81 fq_if_size
= sizeof (fq_if_t
);
82 fq_if_zone
= zinit(fq_if_size
, (FQ_IF_ZONE_MAX
* fq_if_size
), 0,
84 if (fq_if_zone
== NULL
) {
85 panic("%s: failed allocating from %s", __func__
,
88 zone_change(fq_if_zone
, Z_EXPAND
, TRUE
);
89 zone_change(fq_if_zone
, Z_CALLERACCT
, TRUE
);
94 fq_if_alloc(struct ifnet
*ifp
, int how
)
97 fqs
= (how
== M_WAITOK
) ? zalloc(fq_if_zone
) :
98 zalloc_noblock(fq_if_zone
);
102 bzero(fqs
, fq_if_size
);
103 fqs
->fqs_ifq
= &ifp
->if_snd
;
105 /* Calculate target queue delay */
106 ifclassq_calc_target_qdelay(ifp
, &fqs
->fqs_target_qdelay
);
108 /* Calculate update interval */
109 ifclassq_calc_update_interval(&fqs
->fqs_update_interval
);
110 fqs
->fqs_pkt_droplimit
= FQ_IF_MAX_PKT_LIMIT
;
111 STAILQ_INIT(&fqs
->fqs_fclist
);
116 fq_if_destroy(fq_if_t
*fqs
)
118 IFCQ_LOCK_ASSERT_HELD(fqs
->fqs_ifq
);
121 zfree(fq_if_zone
, fqs
);
124 static inline u_int32_t
125 fq_if_service_to_priority(mbuf_svc_class_t svc
)
131 pri
= FQ_IF_BK_SYS_INDEX
;
134 pri
= FQ_IF_BK_INDEX
;
137 pri
= FQ_IF_BE_INDEX
;
140 pri
= FQ_IF_RD_INDEX
;
143 pri
= FQ_IF_OAM_INDEX
;
146 pri
= FQ_IF_AV_INDEX
;
149 pri
= FQ_IF_RV_INDEX
;
152 pri
= FQ_IF_VI_INDEX
;
155 pri
= FQ_IF_VO_INDEX
;
158 pri
= FQ_IF_CTL_INDEX
;
161 pri
= FQ_IF_BE_INDEX
; /* Use best effort by default */
168 fq_if_classq_init(fq_if_t
*fqs
, u_int32_t pri
, u_int32_t quantum
,
169 u_int32_t drr_max
, u_int32_t svc_class
)
171 fq_if_classq_t
*fq_cl
;
173 fq_cl
= &fqs
->fqs_classq
[pri
];
175 VERIFY(pri
>= 0 && pri
< FQ_IF_MAX_CLASSES
&&
176 fq_cl
->fcl_quantum
== 0);
177 fq_cl
->fcl_quantum
= quantum
;
178 fq_cl
->fcl_pri
= pri
;
179 fq_cl
->fcl_drr_max
= drr_max
;
180 fq_cl
->fcl_service_class
= svc_class
;
181 STAILQ_INIT(&fq_cl
->fcl_new_flows
);
182 STAILQ_INIT(&fq_cl
->fcl_old_flows
);
186 fq_if_enqueue_classq(struct ifclassq
*ifq
, struct mbuf
*m
)
190 fq_if_classq_t
*fq_cl
;
192 mbuf_svc_class_t svc
;
194 IFCQ_LOCK_ASSERT_HELD(ifq
);
195 if (!(m
->m_flags
& M_PKTHDR
)) {
196 IFCQ_CONVERT_LOCK(ifq
);
201 fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
202 svc
= mbuf_get_service_class(m
);
203 pri
= fq_if_service_to_priority(svc
);
204 VERIFY(pri
>= 0 && pri
< FQ_IF_MAX_CLASSES
);
205 fq_cl
= &fqs
->fqs_classq
[pri
];
207 if (svc
== MBUF_SC_BK_SYS
&& fqs
->fqs_throttle
== 1) {
208 /* BK_SYS is currently throttled */
209 fq_cl
->fcl_stat
.fcl_throttle_drops
++;
210 IFCQ_CONVERT_LOCK(ifq
);
212 return (EQSUSPENDED
);
216 ret
= fq_addq(fqs
, m
, fq_cl
);
217 if (!FQ_IF_CLASSQ_IDLE(fq_cl
)) {
218 if (((fqs
->fqs_bitmaps
[FQ_IF_ER
] | fqs
->fqs_bitmaps
[FQ_IF_EB
]) &
221 * this group is not in ER or EB groups,
224 pktsched_bit_set(pri
, &fqs
->fqs_bitmaps
[FQ_IF_IB
]);
229 if (ret
== CLASSQEQ_SUCCESS_FC
) {
230 /* packet enqueued, return advisory feedback */
233 VERIFY(ret
== CLASSQEQ_DROPPED
||
234 ret
== CLASSQEQ_DROPPED_FC
||
235 ret
== CLASSQEQ_DROPPED_SP
);
237 case CLASSQEQ_DROPPED
:
239 case CLASSQEQ_DROPPED_FC
:
241 case CLASSQEQ_DROPPED_SP
:
242 return (EQSUSPENDED
);
247 IFCQ_INC_BYTES(ifq
, len
);
252 fq_if_dequeue_classq(struct ifclassq
*ifq
, cqdq_op_t op
)
256 (void) fq_if_dequeue_classq_multi(ifq
, op
, 1,
257 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT
, &top
, NULL
, NULL
, NULL
);
263 fq_if_dequeue_classq_multi(struct ifclassq
*ifq
, cqdq_op_t op
,
264 u_int32_t maxpktcnt
, u_int32_t maxbytecnt
, struct mbuf
**first_packet
,
265 struct mbuf
**last_packet
, u_int32_t
*retpktcnt
, u_int32_t
*retbytecnt
)
268 struct mbuf
*top
= NULL
, *tail
= NULL
, *first
, *last
;
269 u_int32_t pktcnt
= 0, bytecnt
= 0, total_pktcnt
, total_bytecnt
;
271 fq_if_classq_t
*fq_cl
;
274 IFCQ_LOCK_ASSERT_HELD(ifq
);
276 fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
279 total_pktcnt
= total_bytecnt
= 0;
282 if (fqs
->fqs_bitmaps
[FQ_IF_ER
] == 0 &&
283 fqs
->fqs_bitmaps
[FQ_IF_EB
] == 0) {
284 fqs
->fqs_bitmaps
[FQ_IF_EB
] = fqs
->fqs_bitmaps
[FQ_IF_IB
];
285 fqs
->fqs_bitmaps
[FQ_IF_IB
] = 0;
286 if (fqs
->fqs_bitmaps
[FQ_IF_EB
] == 0)
289 pri
= pktsched_ffs(fqs
->fqs_bitmaps
[FQ_IF_ER
]);
292 * There are no ER flows, move the highest
293 * priority one from EB if there are any in that
296 pri
= pktsched_ffs(fqs
->fqs_bitmaps
[FQ_IF_EB
]);
298 pktsched_bit_clr((pri
- 1),
299 &fqs
->fqs_bitmaps
[FQ_IF_EB
]);
300 pktsched_bit_set((pri
- 1),
301 &fqs
->fqs_bitmaps
[FQ_IF_ER
]);
303 pri
--; /* index starts at 0 */
304 fq_cl
= &fqs
->fqs_classq
[pri
];
306 if (fq_cl
->fcl_budget
<= 0) {
307 /* Update the budget */
308 fq_cl
->fcl_budget
+= (min(fq_cl
->fcl_drr_max
,
309 fq_cl
->fcl_stat
.fcl_flows_cnt
) *
311 if (fq_cl
->fcl_budget
<= 0)
314 fq_if_dequeue(fqs
, fq_cl
, (maxpktcnt
- total_pktcnt
),
315 (maxbytecnt
- total_bytecnt
), &top
, &tail
, &pktcnt
,
318 VERIFY(pktcnt
> 0 && bytecnt
> 0);
322 total_pktcnt
= pktcnt
;
323 total_bytecnt
= bytecnt
;
325 last
->m_nextpkt
= top
;
327 total_pktcnt
+= pktcnt
;
328 total_bytecnt
+= bytecnt
;
330 last
->m_nextpkt
= NULL
;
331 fq_cl
->fcl_budget
-= bytecnt
;
337 * If the class has exceeded the budget but still has data
338 * to send, move it to IB
341 if (!FQ_IF_CLASSQ_IDLE(fq_cl
)) {
342 if (fq_cl
->fcl_budget
<= 0) {
343 pktsched_bit_set(pri
,
344 &fqs
->fqs_bitmaps
[FQ_IF_IB
]);
345 pktsched_bit_clr(pri
,
346 &fqs
->fqs_bitmaps
[FQ_IF_ER
]);
349 pktsched_bit_clr(pri
, &fqs
->fqs_bitmaps
[FQ_IF_ER
]);
350 VERIFY(((fqs
->fqs_bitmaps
[FQ_IF_ER
] |
351 fqs
->fqs_bitmaps
[FQ_IF_EB
] |
352 fqs
->fqs_bitmaps
[FQ_IF_IB
])&(1 << pri
)) == 0);
353 fq_cl
->fcl_budget
= 0;
355 if (total_pktcnt
>= maxpktcnt
|| total_bytecnt
>= maxbytecnt
)
359 if (first_packet
!= NULL
)
360 *first_packet
= first
;
361 if (last_packet
!= NULL
)
363 if (retpktcnt
!= NULL
)
364 *retpktcnt
= total_pktcnt
;
365 if (retbytecnt
!= NULL
)
366 *retbytecnt
= total_bytecnt
;
367 IFCQ_XMIT_ADD(ifq
, total_pktcnt
, total_bytecnt
);
369 if (first_packet
!= NULL
)
370 *first_packet
= NULL
;
371 if (last_packet
!= NULL
)
373 if (retpktcnt
!= NULL
)
375 if (retbytecnt
!= NULL
)
382 fq_if_purge_flow(fq_if_t
*fqs
, fq_t
*fq
, u_int32_t
*pktsp
,
385 fq_if_classq_t
*fq_cl
;
386 u_int32_t pkts
, bytes
;
389 fq_cl
= &fqs
->fqs_classq
[fq
->fq_sc_index
];
391 while ((m
= fq_getq_flow(fqs
, fq
)) != NULL
) {
393 bytes
+= m_length(m
);
397 IFCQ_DROP_ADD(fqs
->fqs_ifq
, pkts
, bytes
);
399 if (fq
->fq_flags
& FQF_NEW_FLOW
) {
400 fq_if_empty_new_flow(fq
, fq_cl
, false);
401 } else if (fq
->fq_flags
& FQF_OLD_FLOW
) {
402 fq_if_empty_old_flow(fqs
, fq_cl
, fq
, false);
405 fq_if_destroy_flow(fqs
, fq_cl
, fq
);
407 if (FQ_IF_CLASSQ_IDLE(fq_cl
)) {
409 for (i
= FQ_IF_ER
; i
< FQ_IF_MAX_STATE
; i
++) {
410 pktsched_bit_clr(fq_cl
->fcl_pri
,
411 &fqs
->fqs_bitmaps
[i
]);
421 fq_if_purge_classq(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
)
425 * Take each flow from new/old flow list and flush mbufs
428 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_new_flows
, fq_actlink
, tfq
) {
429 fq_if_purge_flow(fqs
, fq
, NULL
, NULL
);
431 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_old_flows
, fq_actlink
, tfq
) {
432 fq_if_purge_flow(fqs
, fq
, NULL
, NULL
);
434 VERIFY(STAILQ_EMPTY(&fq_cl
->fcl_new_flows
));
435 VERIFY(STAILQ_EMPTY(&fq_cl
->fcl_old_flows
));
437 STAILQ_INIT(&fq_cl
->fcl_new_flows
);
438 STAILQ_INIT(&fq_cl
->fcl_old_flows
);
439 fq_cl
->fcl_budget
= 0;
443 fq_if_purge(fq_if_t
*fqs
)
447 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
448 for (i
= 0; i
< FQ_IF_MAX_CLASSES
; i
++) {
449 fq_if_purge_classq(fqs
, &fqs
->fqs_classq
[i
]);
452 VERIFY(STAILQ_EMPTY(&fqs
->fqs_fclist
));
454 fqs
->fqs_large_flow
= NULL
;
455 for (i
= 0; i
< FQ_IF_HASH_TABLE_SIZE
; i
++) {
456 VERIFY(SLIST_EMPTY(&fqs
->fqs_flows
[i
]));
459 bzero(&fqs
->fqs_bitmaps
, sizeof (fqs
->fqs_bitmaps
));
461 IFCQ_LEN(fqs
->fqs_ifq
) = 0;
462 IFCQ_BYTES(fqs
->fqs_ifq
) = 0;
466 fq_if_purge_sc(fq_if_t
*fqs
, cqrq_purge_sc_t
*req
)
470 IFCQ_LOCK_ASSERT_HELD(fqs
->fqs_ifq
);
471 req
->packets
= req
->bytes
= 0;
472 VERIFY(req
->flow
!= 0);
474 fq
= fq_if_hash_pkt(fqs
, req
->flow
, req
->sc
, 0, FALSE
);
477 fq_if_purge_flow(fqs
, fq
, &req
->packets
, &req
->bytes
);
481 fq_if_event(fq_if_t
*fqs
, cqev_t ev
)
483 IFCQ_LOCK_ASSERT_HELD(fqs
->fqs_ifq
);
486 case CLASSQ_EV_LINK_UP
:
487 case CLASSQ_EV_LINK_DOWN
:
496 fq_if_classq_suspend(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
)
498 fq_if_purge_classq(fqs
, fq_cl
);
499 fqs
->fqs_throttle
= 1;
500 fq_cl
->fcl_stat
.fcl_throttle_on
++;
504 fq_if_classq_resume(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
)
506 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl
));
507 fqs
->fqs_throttle
= 0;
508 fq_cl
->fcl_stat
.fcl_throttle_off
++;
513 fq_if_throttle(fq_if_t
*fqs
, cqrq_throttle_t
*tr
)
515 struct ifclassq
*ifq
= fqs
->fqs_ifq
;
518 IFCQ_LOCK_ASSERT_HELD(ifq
);
521 tr
->level
= fqs
->fqs_throttle
;
525 if (tr
->level
== fqs
->fqs_throttle
)
528 /* Throttling is allowed on BK_SYS class only */
529 index
= fq_if_service_to_priority(MBUF_SC_BK_SYS
);
531 case IFNET_THROTTLE_OFF
:
532 fq_if_classq_resume(fqs
, &fqs
->fqs_classq
[index
]);
534 case IFNET_THROTTLE_OPPORTUNISTIC
:
535 fq_if_classq_suspend(fqs
, &fqs
->fqs_classq
[index
]);
544 fq_if_stat_sc(fq_if_t
*fqs
, cqrq_stat_sc_t
*stat
)
547 fq_if_classq_t
*fq_cl
;
552 pri
= fq_if_service_to_priority(stat
->sc
);
553 fq_cl
= &fqs
->fqs_classq
[pri
];
554 stat
->packets
= fq_cl
->fcl_stat
.fcl_pkt_cnt
;
555 stat
->bytes
= fq_cl
->fcl_stat
.fcl_byte_cnt
;
559 fq_if_request_classq(struct ifclassq
*ifq
, cqrq_t rq
, void *arg
)
562 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
564 IFCQ_LOCK_ASSERT_HELD(ifq
);
567 * These are usually slow operations, convert the lock ahead of time
569 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
574 case CLASSQRQ_PURGE_SC
:
575 fq_if_purge_sc(fqs
, (cqrq_purge_sc_t
*)arg
);
578 fq_if_event(fqs
, (cqev_t
)arg
);
580 case CLASSQRQ_THROTTLE
:
581 fq_if_throttle(fqs
, (cqrq_throttle_t
*)arg
);
583 case CLASSQRQ_STAT_SC
:
584 fq_if_stat_sc(fqs
, (cqrq_stat_sc_t
*)arg
);
591 fq_if_setup_ifclassq(struct ifclassq
*ifq
, u_int32_t flags
)
593 #pragma unused(flags)
594 struct ifnet
*ifp
= ifq
->ifcq_ifp
;
598 IFCQ_LOCK_ASSERT_HELD(ifq
);
599 VERIFY(ifq
->ifcq_disc
== NULL
);
600 VERIFY(ifq
->ifcq_type
== PKTSCHEDT_NONE
);
602 fqs
= fq_if_alloc(ifp
, M_WAITOK
);
606 fq_if_classq_init(fqs
, FQ_IF_BK_SYS_INDEX
, 1500, 2, MBUF_SC_BK_SYS
);
607 fq_if_classq_init(fqs
, FQ_IF_BK_INDEX
, 1500, 2, MBUF_SC_BK
);
608 fq_if_classq_init(fqs
, FQ_IF_BE_INDEX
, 1500, 4, MBUF_SC_BE
);
609 fq_if_classq_init(fqs
, FQ_IF_RD_INDEX
, 1500, 4, MBUF_SC_RD
);
610 fq_if_classq_init(fqs
, FQ_IF_OAM_INDEX
, 1500, 4, MBUF_SC_OAM
);
611 fq_if_classq_init(fqs
, FQ_IF_AV_INDEX
, 3000, 6, MBUF_SC_AV
);
612 fq_if_classq_init(fqs
, FQ_IF_RV_INDEX
, 3000, 6, MBUF_SC_RV
);
613 fq_if_classq_init(fqs
, FQ_IF_VI_INDEX
, 3000, 6, MBUF_SC_VI
);
614 fq_if_classq_init(fqs
, FQ_IF_VO_INDEX
, 600, 8, MBUF_SC_VO
);
615 fq_if_classq_init(fqs
, FQ_IF_CTL_INDEX
, 600, 8, MBUF_SC_CTL
);
617 err
= ifclassq_attach(ifq
, PKTSCHEDT_FQ_CODEL
, fqs
,
618 fq_if_enqueue_classq
, fq_if_dequeue_classq
, NULL
,
619 fq_if_dequeue_classq_multi
, fq_if_request_classq
);
622 printf("%s: error from ifclassq_attach, "
623 "failed to attach fq_if: %d\n", __func__
, err
);
630 fq_if_hash_pkt(fq_if_t
*fqs
, u_int32_t flowid
, mbuf_svc_class_t svc_class
,
631 u_int64_t now
, boolean_t create
)
634 flowq_list_t
*fq_list
;
635 fq_if_classq_t
*fq_cl
;
636 u_int8_t fqs_hash_id
;
639 scidx
= fq_if_service_to_priority(svc_class
);
641 fqs_hash_id
= FQ_IF_FLOW_HASH_ID(flowid
);
643 fq_list
= &fqs
->fqs_flows
[fqs_hash_id
];
645 SLIST_FOREACH(fq
, fq_list
, fq_hashlink
) {
646 if (fq
->fq_flowhash
== flowid
&&
647 fq
->fq_sc_index
== scidx
)
650 if (fq
== NULL
&& create
== TRUE
) {
651 /* If the flow is not already on the list, allocate it */
652 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
653 fq
= fq_alloc(M_WAITOK
);
655 fq
->fq_flowhash
= flowid
;
656 fq
->fq_sc_index
= scidx
;
657 fq
->fq_updatetime
= now
+ fqs
->fqs_update_interval
;
658 fq_cl
= &fqs
->fqs_classq
[scidx
];
660 fq
->fq_flags
= FQF_FLOWCTL_CAPABLE
;
661 SLIST_INSERT_HEAD(fq_list
, fq
, fq_hashlink
);
662 fq_cl
->fcl_stat
.fcl_flows_cnt
++;
667 * If getq time is not set because this is the first packet or after
668 * idle time, set it now so that we can detect a stall.
670 if (fq
->fq_getqtime
== 0)
671 fq
->fq_getqtime
= now
;
677 fq_if_destroy_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, fq_t
*fq
)
680 hash_id
= FQ_IF_FLOW_HASH_ID(fq
->fq_flowhash
);
681 SLIST_REMOVE(&fqs
->fqs_flows
[hash_id
], fq
, flowq
,
683 fq_cl
->fcl_stat
.fcl_flows_cnt
--;
684 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
690 fq_if_at_drop_limit(fq_if_t
*fqs
)
692 return (((IFCQ_LEN(fqs
->fqs_ifq
) >= fqs
->fqs_pkt_droplimit
) ?
697 fq_if_empty_old_flow(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, fq_t
*fq
,
701 * Remove the flow queue if it is empty
704 STAILQ_REMOVE(&fq_cl
->fcl_old_flows
, fq
, flowq
,
706 fq
->fq_flags
&= ~FQF_OLD_FLOW
;
707 fq_cl
->fcl_stat
.fcl_oldflows_cnt
--;
708 VERIFY(fq
->fq_bytes
== 0);
711 /* Remove from the hash list */
712 fq_if_destroy_flow(fqs
, fq_cl
, fq
);
717 fq_if_empty_new_flow(fq_t
*fq
, fq_if_classq_t
*fq_cl
, bool add_to_old
)
719 /* Move to the end of old queue list */
720 STAILQ_REMOVE(&fq_cl
->fcl_new_flows
, fq
,
722 fq
->fq_flags
&= ~FQF_NEW_FLOW
;
723 fq_cl
->fcl_stat
.fcl_newflows_cnt
--;
726 STAILQ_INSERT_TAIL(&fq_cl
->fcl_old_flows
, fq
,
728 fq
->fq_flags
|= FQF_OLD_FLOW
;
729 fq_cl
->fcl_stat
.fcl_oldflows_cnt
++;
734 fq_if_drop_packet(fq_if_t
*fqs
)
736 fq_t
*fq
= fqs
->fqs_large_flow
;
738 fq_if_classq_t
*fq_cl
;
742 /* mbufq can not be empty on the largest flow */
743 VERIFY(!MBUFQ_EMPTY(&fq
->fq_mbufq
));
745 fq_cl
= &fqs
->fqs_classq
[fq
->fq_sc_index
];
747 m
= fq_getq_flow(fqs
, fq
);
749 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
750 if (MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
751 if (fq
->fq_flags
& FQF_OLD_FLOW
) {
752 fq_if_empty_old_flow(fqs
, fq_cl
, fq
, true);
754 VERIFY(fq
->fq_flags
& FQF_NEW_FLOW
);
755 fq_if_empty_new_flow(fq
, fq_cl
, true);
758 IFCQ_DROP_ADD(fqs
->fqs_ifq
, 1, m_length(m
));
761 fq_cl
->fcl_stat
.fcl_drop_overflow
++;
765 fq_if_is_flow_heavy(fq_if_t
*fqs
, fq_t
*fq
)
767 fq_t
*prev_fq
= fqs
->fqs_large_flow
;
768 if (prev_fq
== NULL
&& !MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
769 fqs
->fqs_large_flow
= fq
;
771 } else if (fq
->fq_bytes
> prev_fq
->fq_bytes
) {
772 fqs
->fqs_large_flow
= fq
;
777 fq_if_add_fcentry(fq_if_t
*fqs
, struct pkthdr
*pkt
, fq_if_classq_t
*fq_cl
)
779 struct flowadv_fcentry
*fce
;
780 u_int32_t flowsrc
, flowid
;
782 flowsrc
= pkt
->pkt_flowsrc
;
783 flowid
= pkt
->pkt_flowid
;
785 STAILQ_FOREACH(fce
, &fqs
->fqs_fclist
, fce_link
) {
786 if (fce
->fce_flowsrc
== flowsrc
&&
787 fce
->fce_flowid
== flowid
) {
788 /* Already on flowcontrol list */
793 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
794 fce
= flowadv_alloc_entry(M_WAITOK
);
796 fce
->fce_flowsrc
= flowsrc
;
797 fce
->fce_flowid
= flowid
;
798 /* XXX Add number of bytes in the queue */
799 STAILQ_INSERT_TAIL(&fqs
->fqs_fclist
, fce
, fce_link
);
800 fq_cl
->fcl_stat
.fcl_flow_control
++;
802 return ((fce
!= NULL
) ? TRUE
: FALSE
);
806 fq_if_flow_feedback(fq_if_t
*fqs
, fq_t
*fq
, fq_if_classq_t
*fq_cl
)
808 struct flowadv_fcentry
*fce
= NULL
;
810 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
);
811 STAILQ_FOREACH(fce
, &fqs
->fqs_fclist
, fce_link
) {
812 if (fce
->fce_flowid
== fq
->fq_flowhash
)
816 STAILQ_REMOVE(&fqs
->fqs_fclist
, fce
, flowadv_fcentry
,
818 STAILQ_NEXT(fce
, fce_link
) = NULL
;
819 flowadv_add_entry(fce
);
820 fq_cl
->fcl_stat
.fcl_flow_feedback
++;
822 fq
->fq_flags
&= ~FQF_FLOWCTL_ON
;
826 fq_if_dequeue(fq_if_t
*fqs
, fq_if_classq_t
*fq_cl
, u_int32_t pktlimit
,
827 u_int32_t bytelimit
, struct mbuf
**top
, struct mbuf
**tail
,
828 u_int32_t
*retpktcnt
, u_int32_t
*retbytecnt
)
830 fq_t
*fq
= NULL
, *tfq
= NULL
;
831 struct mbuf
*m
= NULL
, *last
= NULL
;
832 flowq_stailq_t temp_stailq
;
833 u_int32_t pktcnt
, bytecnt
, mlen
;
834 boolean_t limit_reached
= FALSE
;
837 * maximum byte limit should not be greater than the budget for
840 if ((int32_t)bytelimit
> fq_cl
->fcl_budget
)
841 bytelimit
= fq_cl
->fcl_budget
;
843 VERIFY(pktlimit
> 0 && bytelimit
> 0 && top
!= NULL
);
846 pktcnt
= bytecnt
= 0;
847 STAILQ_INIT(&temp_stailq
);
849 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_new_flows
, fq_actlink
, tfq
) {
850 VERIFY((fq
->fq_flags
& (FQF_NEW_FLOW
|FQF_OLD_FLOW
)) ==
852 while (fq
->fq_deficit
> 0 && limit_reached
== FALSE
&&
853 !MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
855 m
= fq_getq_flow(fqs
, fq
);
856 m
->m_pkthdr
.pkt_flags
|= PKTF_NEW_FLOW
;
858 fq
->fq_deficit
-= mlen
;
866 last
->m_nextpkt
= NULL
;
867 fq_cl
->fcl_stat
.fcl_dequeue
++;
868 fq_cl
->fcl_stat
.fcl_dequeue_bytes
+= mlen
;
873 /* Check if the limit is reached */
874 if (pktcnt
>= pktlimit
|| bytecnt
>= bytelimit
)
875 limit_reached
= TRUE
;
878 if (fq
->fq_deficit
<= 0 || MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
879 fq_if_empty_new_flow(fq
, fq_cl
, true);
880 fq
->fq_deficit
+= fq_cl
->fcl_quantum
;
882 if (limit_reached
== TRUE
)
886 STAILQ_FOREACH_SAFE(fq
, &fq_cl
->fcl_old_flows
, fq_actlink
, tfq
) {
887 VERIFY((fq
->fq_flags
& (FQF_NEW_FLOW
|FQF_OLD_FLOW
)) ==
889 while (fq
->fq_deficit
> 0 && !MBUFQ_EMPTY(&fq
->fq_mbufq
) &&
890 limit_reached
== FALSE
) {
891 m
= fq_getq_flow(fqs
, fq
);
893 fq
->fq_deficit
-= mlen
;
900 last
->m_nextpkt
= NULL
;
901 fq_cl
->fcl_stat
.fcl_dequeue
++;
902 fq_cl
->fcl_stat
.fcl_dequeue_bytes
+= mlen
;
907 /* Check if the limit is reached */
908 if (pktcnt
>= pktlimit
|| bytecnt
>= bytelimit
)
909 limit_reached
= TRUE
;
912 if (MBUFQ_EMPTY(&fq
->fq_mbufq
)) {
913 fq_if_empty_old_flow(fqs
, fq_cl
, fq
, true);
914 } else if (fq
->fq_deficit
<= 0) {
915 STAILQ_REMOVE(&fq_cl
->fcl_old_flows
, fq
,
918 * Move to the end of the old queues list. We do not
919 * need to update the flow count since this flow
920 * will be added to the tail again
922 STAILQ_INSERT_TAIL(&temp_stailq
, fq
, fq_actlink
);
923 fq
->fq_deficit
+= fq_cl
->fcl_quantum
;
926 if (limit_reached
== TRUE
)
931 if (!STAILQ_EMPTY(&fq_cl
->fcl_old_flows
)) {
932 STAILQ_CONCAT(&fq_cl
->fcl_old_flows
, &temp_stailq
);
933 } else if (!STAILQ_EMPTY(&temp_stailq
)) {
934 fq_cl
->fcl_old_flows
= temp_stailq
;
938 VERIFY(*top
!= NULL
);
941 if (retpktcnt
!= NULL
)
943 if (retbytecnt
!= NULL
)
944 *retbytecnt
= bytecnt
;
949 fq_if_teardown_ifclassq(struct ifclassq
*ifq
)
951 fq_if_t
*fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
953 IFCQ_LOCK_ASSERT_HELD(ifq
);
954 VERIFY(fqs
!= NULL
&& ifq
->ifcq_type
== PKTSCHEDT_FQ_CODEL
);
957 ifq
->ifcq_disc
= NULL
;
959 return (ifclassq_detach(ifq
));
963 fq_if_getqstats_ifclassq(struct ifclassq
*ifq
, u_int32_t qid
,
964 struct if_ifclassq_stats
*ifqs
)
966 struct fq_codel_classstats
*fcls
;
967 fq_if_classq_t
*fq_cl
;
970 if (qid
>= FQ_IF_MAX_CLASSES
)
973 fqs
= (fq_if_t
*)ifq
->ifcq_disc
;
974 fcls
= &ifqs
->ifqs_fq_codel_stats
;
976 fq_cl
= &fqs
->fqs_classq
[qid
];
978 fcls
->fcls_pri
= fq_cl
->fcl_pri
;
979 fcls
->fcls_service_class
= fq_cl
->fcl_service_class
;
980 fcls
->fcls_quantum
= fq_cl
->fcl_quantum
;
981 fcls
->fcls_drr_max
= fq_cl
->fcl_drr_max
;
982 fcls
->fcls_budget
= fq_cl
->fcl_budget
;
983 fcls
->fcls_target_qdelay
= fqs
->fqs_target_qdelay
;
984 fcls
->fcls_update_interval
= fqs
->fqs_update_interval
;
985 fcls
->fcls_flow_control
= fq_cl
->fcl_stat
.fcl_flow_control
;
986 fcls
->fcls_flow_feedback
= fq_cl
->fcl_stat
.fcl_flow_feedback
;
987 fcls
->fcls_dequeue_stall
= fq_cl
->fcl_stat
.fcl_dequeue_stall
;
988 fcls
->fcls_drop_overflow
= fq_cl
->fcl_stat
.fcl_drop_overflow
;
989 fcls
->fcls_drop_early
= fq_cl
->fcl_stat
.fcl_drop_early
;
990 fcls
->fcls_drop_memfailure
= fq_cl
->fcl_stat
.fcl_drop_memfailure
;
991 fcls
->fcls_flows_cnt
= fq_cl
->fcl_stat
.fcl_flows_cnt
;
992 fcls
->fcls_newflows_cnt
= fq_cl
->fcl_stat
.fcl_newflows_cnt
;
993 fcls
->fcls_oldflows_cnt
= fq_cl
->fcl_stat
.fcl_oldflows_cnt
;
994 fcls
->fcls_pkt_cnt
= fq_cl
->fcl_stat
.fcl_pkt_cnt
;
995 fcls
->fcls_flow_control_fail
= fq_cl
->fcl_stat
.fcl_flow_control_fail
;
996 fcls
->fcls_flow_control_fail
= fq_cl
->fcl_stat
.fcl_flow_control_fail
;
997 fcls
->fcls_dequeue
= fq_cl
->fcl_stat
.fcl_dequeue
;
998 fcls
->fcls_dequeue_bytes
= fq_cl
->fcl_stat
.fcl_dequeue_bytes
;
999 fcls
->fcls_byte_cnt
= fq_cl
->fcl_stat
.fcl_byte_cnt
;
1000 fcls
->fcls_throttle_on
= fq_cl
->fcl_stat
.fcl_throttle_on
;
1001 fcls
->fcls_throttle_off
= fq_cl
->fcl_stat
.fcl_throttle_off
;
1002 fcls
->fcls_throttle_drops
= fq_cl
->fcl_stat
.fcl_throttle_drops
;
1003 fcls
->fcls_dup_rexmts
= fq_cl
->fcl_stat
.fcl_dup_rexmts
;