]>
Commit | Line | Data |
---|---|---|
39037602 | 1 | /* |
5ba3f43e | 2 | * Copyright (c) 2016-2017 Apple Inc. All rights reserved. |
39037602 A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/types.h> | |
30 | #include <sys/param.h> | |
31 | #include <kern/zalloc.h> | |
32 | #include <net/if_var.h> | |
33 | #include <net/if.h> | |
34 | #include <net/classq/classq.h> | |
35 | #include <net/classq/classq_fq_codel.h> | |
36 | #include <net/pktsched/pktsched_fq_codel.h> | |
37 | ||
39037602 A |
38 | static size_t fq_if_size; |
39 | static struct zone *fq_if_zone; | |
40 | ||
5ba3f43e | 41 | static fq_if_t *fq_if_alloc(struct ifnet *, classq_pkt_type_t); |
39037602 A |
42 | static void fq_if_destroy(fq_if_t *fqs); |
43 | static void fq_if_classq_init(fq_if_t *fqs, u_int32_t priority, | |
44 | u_int32_t quantum, u_int32_t drr_max, u_int32_t svc_class); | |
5ba3f43e A |
45 | static int fq_if_enqueue_classq(struct ifclassq *ifq, void *p, |
46 | classq_pkt_type_t ptype, boolean_t *pdrop); | |
47 | static void *fq_if_dequeue_classq(struct ifclassq *, classq_pkt_type_t *); | |
48 | static int fq_if_dequeue_classq_multi(struct ifclassq *, u_int32_t, | |
49 | u_int32_t, void **, void **, u_int32_t *, u_int32_t *, classq_pkt_type_t *); | |
50 | static void *fq_if_dequeue_sc_classq(struct ifclassq *, mbuf_svc_class_t, | |
51 | classq_pkt_type_t *); | |
52 | static int fq_if_dequeue_sc_classq_multi(struct ifclassq *, | |
53 | mbuf_svc_class_t, u_int32_t, u_int32_t, void **, | |
54 | void **, u_int32_t *, u_int32_t *, classq_pkt_type_t *); | |
39037602 | 55 | static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, u_int32_t, |
5ba3f43e A |
56 | u_int32_t, void **, void **, u_int32_t *, u_int32_t *, |
57 | boolean_t drvmgmt, classq_pkt_type_t *); | |
39037602 A |
58 | static int fq_if_request_classq(struct ifclassq *ifq, cqrq_t op, void *arg); |
59 | void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat); | |
60 | static void fq_if_purge(fq_if_t *); | |
61 | static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *); | |
62 | static void fq_if_purge_flow(fq_if_t *, fq_t *, u_int32_t *, u_int32_t *); | |
63 | static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, | |
64 | bool add_to_old); | |
65 | static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, | |
66 | fq_t *fq, bool remove_hash); | |
39037602 A |
67 | |
68 | #define FQ_IF_ZONE_MAX 32 /* Maximum elements in zone */ | |
69 | #define FQ_IF_ZONE_NAME "pktsched_fq_if" /* zone for fq_if class */ | |
70 | ||
71 | #define FQ_IF_FLOW_HASH_ID(_flowid_) \ | |
72 | (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK) | |
73 | ||
74 | #define FQ_IF_CLASSQ_IDLE(_fcl_) \ | |
75 | (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \ | |
76 | STAILQ_EMPTY(&(_fcl_)->fcl_old_flows)) | |
77 | ||
5ba3f43e A |
78 | typedef void (* fq_if_append_pkt_t)(void *, void *); |
79 | typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *, | |
80 | u_int32_t, u_int32_t, void **, void **, u_int32_t *, u_int32_t *, | |
81 | boolean_t *, u_int32_t); | |
82 | ||
83 | static void | |
84 | fq_if_append_mbuf(void *pkt, void *next_pkt) | |
85 | { | |
86 | ((mbuf_t)pkt)->m_nextpkt = (mbuf_t)next_pkt; | |
87 | } | |
88 | ||
89 | ||
90 | ||
91 | static boolean_t | |
92 | fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq, | |
93 | u_int32_t byte_limit, u_int32_t pkt_limit, void **top, void **last, | |
94 | u_int32_t *byte_cnt, u_int32_t *pkt_cnt, boolean_t *qempty, | |
95 | u_int32_t pflags) | |
96 | { | |
97 | struct mbuf *m; | |
98 | u_int32_t plen; | |
99 | pktsched_pkt_t pkt; | |
100 | boolean_t limit_reached = FALSE; | |
101 | struct ifclassq *ifq = fqs->fqs_ifq; | |
102 | struct ifnet *ifp = ifq->ifcq_ifp; | |
103 | ||
104 | while (fq->fq_deficit > 0 && limit_reached == FALSE && | |
105 | !MBUFQ_EMPTY(&fq->fq_mbufq)) { | |
106 | ||
107 | _PKTSCHED_PKT_INIT(&pkt); | |
108 | m = fq_getq_flow(fqs, fq, &pkt); | |
109 | ASSERT(pkt.pktsched_ptype == QP_MBUF); | |
110 | ||
111 | plen = pktsched_get_pkt_len(&pkt); | |
112 | fq->fq_deficit -= plen; | |
113 | m->m_pkthdr.pkt_flags |= pflags; | |
114 | ||
115 | if (*top == NULL) { | |
116 | *top = m; | |
117 | } else { | |
118 | ASSERT(*last != NULL); | |
119 | ASSERT((*(struct mbuf **)last)->m_nextpkt == NULL); | |
120 | (*(struct mbuf **)last)->m_nextpkt = m; | |
121 | } | |
122 | *last = m; | |
123 | (*(mbuf_t *)last)->m_nextpkt = NULL; | |
124 | fq_cl->fcl_stat.fcl_dequeue++; | |
125 | fq_cl->fcl_stat.fcl_dequeue_bytes += plen; | |
126 | *pkt_cnt += 1; | |
127 | *byte_cnt += plen; | |
128 | ||
129 | ifclassq_set_packet_metadata(ifq, ifp, m, QP_MBUF); | |
130 | ||
131 | /* Check if the limit is reached */ | |
132 | if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) | |
133 | limit_reached = TRUE; | |
134 | } | |
135 | ||
136 | *qempty = MBUFQ_EMPTY(&fq->fq_mbufq); | |
137 | return (limit_reached); | |
138 | } | |
139 | ||
39037602 A |
140 | void |
141 | fq_codel_scheduler_init(void) | |
142 | { | |
143 | /* Initialize the zone for flow queue structures */ | |
144 | fq_codel_init(); | |
145 | ||
146 | fq_if_size = sizeof (fq_if_t); | |
147 | fq_if_zone = zinit(fq_if_size, (FQ_IF_ZONE_MAX * fq_if_size), 0, | |
148 | FQ_IF_ZONE_NAME); | |
149 | if (fq_if_zone == NULL) { | |
150 | panic("%s: failed allocating from %s", __func__, | |
151 | (FQ_IF_ZONE_NAME)); | |
152 | } | |
153 | zone_change(fq_if_zone, Z_EXPAND, TRUE); | |
154 | zone_change(fq_if_zone, Z_CALLERACCT, TRUE); | |
155 | ||
156 | } | |
157 | ||
158 | fq_if_t * | |
5ba3f43e | 159 | fq_if_alloc(struct ifnet *ifp, classq_pkt_type_t ptype) |
39037602 A |
160 | { |
161 | fq_if_t *fqs; | |
5ba3f43e | 162 | fqs = zalloc(fq_if_zone); |
39037602 A |
163 | if (fqs == NULL) |
164 | return (NULL); | |
165 | ||
166 | bzero(fqs, fq_if_size); | |
167 | fqs->fqs_ifq = &ifp->if_snd; | |
5ba3f43e | 168 | fqs->fqs_ptype = ptype; |
39037602 A |
169 | |
170 | /* Calculate target queue delay */ | |
171 | ifclassq_calc_target_qdelay(ifp, &fqs->fqs_target_qdelay); | |
172 | ||
173 | /* Calculate update interval */ | |
174 | ifclassq_calc_update_interval(&fqs->fqs_update_interval); | |
5ba3f43e A |
175 | |
176 | /* Configure packet drop limit across all queues */ | |
177 | fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(&ifp->if_snd); | |
39037602 A |
178 | STAILQ_INIT(&fqs->fqs_fclist); |
179 | return (fqs); | |
180 | } | |
181 | ||
182 | void | |
183 | fq_if_destroy(fq_if_t *fqs) | |
184 | { | |
39037602 A |
185 | fq_if_purge(fqs); |
186 | fqs->fqs_ifq = NULL; | |
187 | zfree(fq_if_zone, fqs); | |
188 | } | |
189 | ||
190 | static inline u_int32_t | |
5ba3f43e | 191 | fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc) |
39037602 A |
192 | { |
193 | u_int32_t pri; | |
194 | ||
5ba3f43e A |
195 | if (fqs->fqs_flags & FQS_DRIVER_MANAGED) { |
196 | switch (svc) { | |
197 | case MBUF_SC_BK_SYS: | |
198 | case MBUF_SC_BK: | |
199 | pri = FQ_IF_BK_INDEX; | |
200 | break; | |
201 | case MBUF_SC_BE: | |
202 | case MBUF_SC_RD: | |
203 | case MBUF_SC_OAM: | |
204 | pri = FQ_IF_BE_INDEX; | |
205 | break; | |
206 | case MBUF_SC_AV: | |
207 | case MBUF_SC_RV: | |
208 | case MBUF_SC_VI: | |
209 | pri = FQ_IF_VI_INDEX; | |
210 | break; | |
211 | case MBUF_SC_VO: | |
212 | case MBUF_SC_CTL: | |
213 | pri = FQ_IF_VO_INDEX; | |
214 | break; | |
215 | default: | |
216 | pri = FQ_IF_BE_INDEX; /* Use best effort by default */ | |
217 | break; | |
218 | } | |
219 | return (pri); | |
220 | } | |
221 | ||
222 | /* scheduler is not managed by the driver */ | |
39037602 A |
223 | switch (svc) { |
224 | case MBUF_SC_BK_SYS: | |
225 | pri = FQ_IF_BK_SYS_INDEX; | |
226 | break; | |
227 | case MBUF_SC_BK: | |
228 | pri = FQ_IF_BK_INDEX; | |
229 | break; | |
230 | case MBUF_SC_BE: | |
231 | pri = FQ_IF_BE_INDEX; | |
232 | break; | |
233 | case MBUF_SC_RD: | |
234 | pri = FQ_IF_RD_INDEX; | |
235 | break; | |
236 | case MBUF_SC_OAM: | |
237 | pri = FQ_IF_OAM_INDEX; | |
238 | break; | |
239 | case MBUF_SC_AV: | |
240 | pri = FQ_IF_AV_INDEX; | |
241 | break; | |
242 | case MBUF_SC_RV: | |
243 | pri = FQ_IF_RV_INDEX; | |
244 | break; | |
245 | case MBUF_SC_VI: | |
246 | pri = FQ_IF_VI_INDEX; | |
247 | break; | |
248 | case MBUF_SC_VO: | |
249 | pri = FQ_IF_VO_INDEX; | |
250 | break; | |
251 | case MBUF_SC_CTL: | |
252 | pri = FQ_IF_CTL_INDEX; | |
253 | break; | |
254 | default: | |
255 | pri = FQ_IF_BE_INDEX; /* Use best effort by default */ | |
256 | break; | |
257 | } | |
258 | return (pri); | |
259 | } | |
260 | ||
261 | void | |
262 | fq_if_classq_init(fq_if_t *fqs, u_int32_t pri, u_int32_t quantum, | |
263 | u_int32_t drr_max, u_int32_t svc_class) | |
264 | { | |
265 | fq_if_classq_t *fq_cl; | |
266 | ||
267 | fq_cl = &fqs->fqs_classq[pri]; | |
268 | ||
269 | VERIFY(pri >= 0 && pri < FQ_IF_MAX_CLASSES && | |
270 | fq_cl->fcl_quantum == 0); | |
271 | fq_cl->fcl_quantum = quantum; | |
272 | fq_cl->fcl_pri = pri; | |
273 | fq_cl->fcl_drr_max = drr_max; | |
274 | fq_cl->fcl_service_class = svc_class; | |
275 | STAILQ_INIT(&fq_cl->fcl_new_flows); | |
276 | STAILQ_INIT(&fq_cl->fcl_old_flows); | |
277 | } | |
278 | ||
279 | int | |
5ba3f43e A |
280 | fq_if_enqueue_classq(struct ifclassq *ifq, void *p, classq_pkt_type_t ptype, |
281 | boolean_t *pdrop) | |
39037602 A |
282 | { |
283 | u_int32_t pri; | |
284 | fq_if_t *fqs; | |
285 | fq_if_classq_t *fq_cl; | |
286 | int ret, len; | |
287 | mbuf_svc_class_t svc; | |
5ba3f43e | 288 | pktsched_pkt_t pkt; |
39037602 A |
289 | |
290 | IFCQ_LOCK_ASSERT_HELD(ifq); | |
5ba3f43e | 291 | if ((ptype == QP_MBUF) && !(((mbuf_t)p)->m_flags & M_PKTHDR)) { |
39037602 | 292 | IFCQ_CONVERT_LOCK(ifq); |
5ba3f43e A |
293 | m_freem((mbuf_t)p); |
294 | *pdrop = TRUE; | |
39037602 A |
295 | return (ENOBUFS); |
296 | } | |
5ba3f43e | 297 | pktsched_pkt_encap(&pkt, ptype, p); |
39037602 A |
298 | |
299 | fqs = (fq_if_t *)ifq->ifcq_disc; | |
5ba3f43e A |
300 | svc = pktsched_get_pkt_svc(&pkt); |
301 | pri = fq_if_service_to_priority(fqs, svc); | |
39037602 A |
302 | VERIFY(pri >= 0 && pri < FQ_IF_MAX_CLASSES); |
303 | fq_cl = &fqs->fqs_classq[pri]; | |
304 | ||
305 | if (svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1) { | |
306 | /* BK_SYS is currently throttled */ | |
307 | fq_cl->fcl_stat.fcl_throttle_drops++; | |
308 | IFCQ_CONVERT_LOCK(ifq); | |
5ba3f43e A |
309 | pktsched_free_pkt(&pkt); |
310 | *pdrop = TRUE; | |
39037602 A |
311 | return (EQSUSPENDED); |
312 | } | |
313 | ||
5ba3f43e A |
314 | len = pktsched_get_pkt_len(&pkt); |
315 | ret = fq_addq(fqs, &pkt, fq_cl); | |
316 | if (!(fqs->fqs_flags & FQS_DRIVER_MANAGED) && | |
317 | !FQ_IF_CLASSQ_IDLE(fq_cl)) { | |
39037602 A |
318 | if (((fqs->fqs_bitmaps[FQ_IF_ER] | fqs->fqs_bitmaps[FQ_IF_EB]) & |
319 | (1 << pri)) == 0) { | |
320 | /* | |
321 | * this group is not in ER or EB groups, | |
322 | * mark it as IB | |
323 | */ | |
324 | pktsched_bit_set(pri, &fqs->fqs_bitmaps[FQ_IF_IB]); | |
325 | } | |
326 | } | |
327 | ||
328 | if (ret != 0) { | |
329 | if (ret == CLASSQEQ_SUCCESS_FC) { | |
330 | /* packet enqueued, return advisory feedback */ | |
331 | ret = EQFULL; | |
5ba3f43e | 332 | *pdrop = FALSE; |
39037602 | 333 | } else { |
5ba3f43e A |
334 | *pdrop = TRUE; |
335 | VERIFY(ret == CLASSQEQ_DROP || | |
336 | ret == CLASSQEQ_DROP_FC || | |
337 | ret == CLASSQEQ_DROP_SP); | |
338 | pktsched_free_pkt(&pkt); | |
39037602 | 339 | switch (ret) { |
5ba3f43e | 340 | case CLASSQEQ_DROP: |
39037602 | 341 | return (ENOBUFS); |
5ba3f43e | 342 | case CLASSQEQ_DROP_FC: |
39037602 | 343 | return (EQFULL); |
5ba3f43e | 344 | case CLASSQEQ_DROP_SP: |
39037602 A |
345 | return (EQSUSPENDED); |
346 | } | |
347 | } | |
5ba3f43e A |
348 | } else { |
349 | *pdrop = FALSE; | |
39037602 A |
350 | } |
351 | IFCQ_INC_LEN(ifq); | |
352 | IFCQ_INC_BYTES(ifq, len); | |
353 | return (ret); | |
354 | } | |
355 | ||
5ba3f43e A |
356 | static void * |
357 | fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_type_t *ptype) | |
39037602 | 358 | { |
5ba3f43e | 359 | void *top; |
39037602 | 360 | |
5ba3f43e A |
361 | (void) fq_if_dequeue_classq_multi(ifq, 1, |
362 | CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &top, NULL, NULL, NULL, ptype); | |
363 | return (top); | |
364 | } | |
39037602 | 365 | |
5ba3f43e A |
366 | static void * |
367 | fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc, | |
368 | classq_pkt_type_t *ptype) | |
369 | { | |
370 | void *top; | |
371 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; | |
372 | fq_if_classq_t *fq_cl; | |
373 | u_int32_t pri; | |
374 | ||
375 | pri = fq_if_service_to_priority(fqs, svc); | |
376 | fq_cl = &fqs->fqs_classq[pri]; | |
377 | ||
378 | fq_if_dequeue(fqs, fq_cl, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, | |
379 | &top, NULL, NULL, NULL, TRUE, ptype); | |
39037602 A |
380 | return (top); |
381 | } | |
382 | ||
383 | int | |
5ba3f43e A |
384 | fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt, |
385 | u_int32_t maxbytecnt, void **first_packet, | |
386 | void **last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt, | |
387 | classq_pkt_type_t *ptype) | |
39037602 | 388 | { |
5ba3f43e | 389 | void *top = NULL, *tail = NULL, *first, *last; |
39037602 A |
390 | u_int32_t pktcnt = 0, bytecnt = 0, total_pktcnt, total_bytecnt; |
391 | fq_if_t *fqs; | |
392 | fq_if_classq_t *fq_cl; | |
393 | int pri; | |
5ba3f43e | 394 | fq_if_append_pkt_t append_pkt; |
39037602 A |
395 | |
396 | IFCQ_LOCK_ASSERT_HELD(ifq); | |
397 | ||
398 | fqs = (fq_if_t *)ifq->ifcq_disc; | |
399 | ||
5ba3f43e A |
400 | switch (fqs->fqs_ptype) { |
401 | case QP_MBUF: | |
402 | append_pkt = fq_if_append_mbuf; | |
403 | break; | |
404 | ||
405 | ||
406 | default: | |
407 | VERIFY(0); | |
408 | /* NOTREACHED */ | |
409 | } | |
410 | ||
39037602 A |
411 | first = last = NULL; |
412 | total_pktcnt = total_bytecnt = 0; | |
5ba3f43e | 413 | *ptype = fqs->fqs_ptype; |
39037602 A |
414 | |
415 | for (;;) { | |
5ba3f43e | 416 | classq_pkt_type_t tmp_ptype; |
39037602 A |
417 | if (fqs->fqs_bitmaps[FQ_IF_ER] == 0 && |
418 | fqs->fqs_bitmaps[FQ_IF_EB] == 0) { | |
419 | fqs->fqs_bitmaps[FQ_IF_EB] = fqs->fqs_bitmaps[FQ_IF_IB]; | |
420 | fqs->fqs_bitmaps[FQ_IF_IB] = 0; | |
421 | if (fqs->fqs_bitmaps[FQ_IF_EB] == 0) | |
422 | break; | |
423 | } | |
424 | pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_ER]); | |
425 | if (pri == 0) { | |
426 | /* | |
427 | * There are no ER flows, move the highest | |
428 | * priority one from EB if there are any in that | |
429 | * category | |
430 | */ | |
431 | pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_EB]); | |
432 | VERIFY(pri > 0); | |
433 | pktsched_bit_clr((pri - 1), | |
434 | &fqs->fqs_bitmaps[FQ_IF_EB]); | |
435 | pktsched_bit_set((pri - 1), | |
436 | &fqs->fqs_bitmaps[FQ_IF_ER]); | |
437 | } | |
438 | pri--; /* index starts at 0 */ | |
439 | fq_cl = &fqs->fqs_classq[pri]; | |
440 | ||
441 | if (fq_cl->fcl_budget <= 0) { | |
442 | /* Update the budget */ | |
443 | fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max, | |
444 | fq_cl->fcl_stat.fcl_flows_cnt) * | |
445 | fq_cl->fcl_quantum); | |
446 | if (fq_cl->fcl_budget <= 0) | |
447 | goto state_change; | |
448 | } | |
449 | fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt), | |
450 | (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt, | |
5ba3f43e | 451 | &bytecnt, FALSE, &tmp_ptype); |
39037602 | 452 | if (top != NULL) { |
5ba3f43e A |
453 | ASSERT(tmp_ptype == *ptype); |
454 | ASSERT(pktcnt > 0 && bytecnt > 0); | |
39037602 A |
455 | if (first == NULL) { |
456 | first = top; | |
457 | last = tail; | |
458 | total_pktcnt = pktcnt; | |
459 | total_bytecnt = bytecnt; | |
460 | } else { | |
5ba3f43e | 461 | append_pkt(last, top); |
39037602 A |
462 | last = tail; |
463 | total_pktcnt += pktcnt; | |
464 | total_bytecnt += bytecnt; | |
465 | } | |
5ba3f43e | 466 | append_pkt(last, NULL); |
39037602 A |
467 | fq_cl->fcl_budget -= bytecnt; |
468 | pktcnt = 0; | |
469 | bytecnt = 0; | |
470 | } | |
471 | ||
472 | /* | |
473 | * If the class has exceeded the budget but still has data | |
474 | * to send, move it to IB | |
475 | */ | |
476 | state_change: | |
477 | if (!FQ_IF_CLASSQ_IDLE(fq_cl)) { | |
478 | if (fq_cl->fcl_budget <= 0) { | |
479 | pktsched_bit_set(pri, | |
480 | &fqs->fqs_bitmaps[FQ_IF_IB]); | |
481 | pktsched_bit_clr(pri, | |
482 | &fqs->fqs_bitmaps[FQ_IF_ER]); | |
483 | } | |
484 | } else { | |
485 | pktsched_bit_clr(pri, &fqs->fqs_bitmaps[FQ_IF_ER]); | |
486 | VERIFY(((fqs->fqs_bitmaps[FQ_IF_ER] | | |
487 | fqs->fqs_bitmaps[FQ_IF_EB] | | |
488 | fqs->fqs_bitmaps[FQ_IF_IB])&(1 << pri)) == 0); | |
489 | fq_cl->fcl_budget = 0; | |
490 | } | |
491 | if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) | |
492 | break; | |
493 | } | |
494 | if (first != NULL) { | |
495 | if (first_packet != NULL) | |
496 | *first_packet = first; | |
497 | if (last_packet != NULL) | |
498 | *last_packet = last; | |
499 | if (retpktcnt != NULL) | |
500 | *retpktcnt = total_pktcnt; | |
501 | if (retbytecnt != NULL) | |
502 | *retbytecnt = total_bytecnt; | |
503 | IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt); | |
504 | } else { | |
505 | if (first_packet != NULL) | |
506 | *first_packet = NULL; | |
507 | if (last_packet != NULL) | |
508 | *last_packet = NULL; | |
509 | if (retpktcnt != NULL) | |
510 | *retpktcnt = 0; | |
511 | if (retbytecnt != NULL) | |
512 | *retbytecnt = 0; | |
513 | } | |
514 | return (0); | |
515 | } | |
516 | ||
5ba3f43e A |
517 | int |
518 | fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc, | |
519 | u_int32_t maxpktcnt, u_int32_t maxbytecnt, void **first_packet, | |
520 | void **last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt, | |
521 | classq_pkt_type_t *ptype) | |
522 | { | |
523 | #pragma unused(maxpktcnt, maxbytecnt, first_packet, last_packet, retpktcnt, retbytecnt) | |
524 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; | |
525 | u_int32_t pri; | |
526 | u_int32_t total_pktcnt = 0, total_bytecnt = 0; | |
527 | fq_if_classq_t *fq_cl; | |
528 | void *first = NULL, *last = NULL; | |
529 | fq_if_append_pkt_t append_pkt; | |
530 | ||
531 | switch (fqs->fqs_ptype) { | |
532 | case QP_MBUF: | |
533 | append_pkt = fq_if_append_mbuf; | |
534 | break; | |
535 | ||
536 | ||
537 | default: | |
538 | VERIFY(0); | |
539 | /* NOTREACHED */ | |
540 | } | |
541 | ||
542 | pri = fq_if_service_to_priority(fqs, svc); | |
543 | fq_cl = &fqs->fqs_classq[pri]; | |
544 | ||
545 | /* | |
546 | * Now we have the queue for a particular service class. We need | |
547 | * to dequeue as many packets as needed, first from the new flows | |
548 | * and then from the old flows. | |
549 | */ | |
550 | while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt && | |
551 | fq_cl->fcl_stat.fcl_pkt_cnt > 0) { | |
552 | void *top, *tail; | |
553 | u_int32_t pktcnt = 0, bytecnt = 0; | |
554 | fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt), | |
555 | (maxbytecnt - total_bytecnt), &top, &tail, &pktcnt, | |
556 | &bytecnt, TRUE, ptype); | |
557 | if (first == NULL) { | |
558 | first = top; | |
559 | total_pktcnt = pktcnt; | |
560 | total_bytecnt = bytecnt; | |
561 | } else { | |
562 | append_pkt(last, top); | |
563 | total_pktcnt += pktcnt; | |
564 | total_bytecnt += bytecnt; | |
565 | } | |
566 | last = tail; | |
567 | } | |
568 | if (first != NULL) { | |
569 | if (first_packet != NULL) | |
570 | *first_packet = first; | |
571 | if (last_packet != NULL) | |
572 | *last_packet = last; | |
573 | if (retpktcnt != NULL) | |
574 | *retpktcnt = total_pktcnt; | |
575 | if (retbytecnt != NULL) | |
576 | *retbytecnt = total_bytecnt; | |
577 | } else { | |
578 | if (first_packet != NULL) | |
579 | *first_packet = NULL; | |
580 | if (last_packet != NULL) | |
581 | *last_packet = NULL; | |
582 | if (retpktcnt != NULL) | |
583 | *retpktcnt = 0; | |
584 | if (retbytecnt != NULL) | |
585 | *retbytecnt = 0; | |
586 | } | |
587 | return (0); | |
588 | } | |
589 | ||
39037602 A |
590 | static void |
591 | fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, u_int32_t *pktsp, | |
592 | u_int32_t *bytesp) | |
593 | { | |
594 | fq_if_classq_t *fq_cl; | |
595 | u_int32_t pkts, bytes; | |
5ba3f43e | 596 | pktsched_pkt_t pkt; |
39037602 A |
597 | |
598 | fq_cl = &fqs->fqs_classq[fq->fq_sc_index]; | |
599 | pkts = bytes = 0; | |
5ba3f43e A |
600 | _PKTSCHED_PKT_INIT(&pkt); |
601 | while (fq_getq_flow(fqs, fq, &pkt) != NULL) { | |
39037602 | 602 | pkts++; |
5ba3f43e A |
603 | bytes += pktsched_get_pkt_len(&pkt); |
604 | pktsched_free_pkt(&pkt); | |
39037602 A |
605 | } |
606 | IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes); | |
607 | ||
608 | if (fq->fq_flags & FQF_NEW_FLOW) { | |
609 | fq_if_empty_new_flow(fq, fq_cl, false); | |
610 | } else if (fq->fq_flags & FQF_OLD_FLOW) { | |
611 | fq_if_empty_old_flow(fqs, fq_cl, fq, false); | |
612 | } | |
613 | ||
614 | fq_if_destroy_flow(fqs, fq_cl, fq); | |
615 | ||
616 | if (FQ_IF_CLASSQ_IDLE(fq_cl)) { | |
617 | int i; | |
618 | for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) { | |
619 | pktsched_bit_clr(fq_cl->fcl_pri, | |
620 | &fqs->fqs_bitmaps[i]); | |
621 | } | |
622 | } | |
623 | if (pktsp != NULL) | |
624 | *pktsp = pkts; | |
625 | if (bytesp != NULL) | |
626 | *bytesp = bytes; | |
627 | } | |
628 | ||
629 | static void | |
630 | fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl) | |
631 | { | |
632 | fq_t *fq, *tfq; | |
633 | /* | |
634 | * Take each flow from new/old flow list and flush mbufs | |
635 | * in that flow | |
636 | */ | |
637 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) { | |
638 | fq_if_purge_flow(fqs, fq, NULL, NULL); | |
639 | } | |
640 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) { | |
641 | fq_if_purge_flow(fqs, fq, NULL, NULL); | |
642 | } | |
643 | VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows)); | |
644 | VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows)); | |
645 | ||
646 | STAILQ_INIT(&fq_cl->fcl_new_flows); | |
647 | STAILQ_INIT(&fq_cl->fcl_old_flows); | |
648 | fq_cl->fcl_budget = 0; | |
649 | } | |
650 | ||
651 | static void | |
652 | fq_if_purge(fq_if_t *fqs) | |
653 | { | |
654 | int i; | |
655 | ||
656 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); | |
657 | for (i = 0; i < FQ_IF_MAX_CLASSES; i++) { | |
658 | fq_if_purge_classq(fqs, &fqs->fqs_classq[i]); | |
659 | } | |
660 | ||
661 | VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist)); | |
662 | ||
663 | fqs->fqs_large_flow = NULL; | |
664 | for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) { | |
665 | VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i])); | |
666 | } | |
667 | ||
668 | bzero(&fqs->fqs_bitmaps, sizeof (fqs->fqs_bitmaps)); | |
669 | ||
670 | IFCQ_LEN(fqs->fqs_ifq) = 0; | |
671 | IFCQ_BYTES(fqs->fqs_ifq) = 0; | |
672 | } | |
673 | ||
674 | static void | |
675 | fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req) | |
676 | { | |
677 | fq_t *fq; | |
678 | ||
679 | IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq); | |
680 | req->packets = req->bytes = 0; | |
681 | VERIFY(req->flow != 0); | |
682 | ||
5ba3f43e A |
683 | /* packet type is needed only if we want to create a flow queue */ |
684 | fq = fq_if_hash_pkt(fqs, req->flow, req->sc, 0, FALSE, QP_INVALID); | |
39037602 A |
685 | |
686 | if (fq != NULL) | |
687 | fq_if_purge_flow(fqs, fq, &req->packets, &req->bytes); | |
688 | } | |
689 | ||
690 | static void | |
691 | fq_if_event(fq_if_t *fqs, cqev_t ev) | |
692 | { | |
693 | IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq); | |
694 | ||
695 | switch (ev) { | |
696 | case CLASSQ_EV_LINK_UP: | |
697 | case CLASSQ_EV_LINK_DOWN: | |
698 | fq_if_purge(fqs); | |
699 | break; | |
700 | default: | |
701 | break; | |
702 | } | |
703 | } | |
704 | ||
705 | static void | |
706 | fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl) | |
707 | { | |
708 | fq_if_purge_classq(fqs, fq_cl); | |
709 | fqs->fqs_throttle = 1; | |
710 | fq_cl->fcl_stat.fcl_throttle_on++; | |
711 | } | |
712 | ||
713 | static void | |
714 | fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl) | |
715 | { | |
716 | VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl)); | |
717 | fqs->fqs_throttle = 0; | |
718 | fq_cl->fcl_stat.fcl_throttle_off++; | |
719 | } | |
720 | ||
721 | ||
722 | static int | |
723 | fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr) | |
724 | { | |
725 | struct ifclassq *ifq = fqs->fqs_ifq; | |
726 | int index; | |
5ba3f43e A |
727 | #if !MACH_ASSERT |
728 | #pragma unused(ifq) | |
729 | #endif | |
39037602 A |
730 | IFCQ_LOCK_ASSERT_HELD(ifq); |
731 | ||
732 | if (!tr->set) { | |
733 | tr->level = fqs->fqs_throttle; | |
734 | return (0); | |
735 | } | |
736 | ||
737 | if (tr->level == fqs->fqs_throttle) | |
738 | return (EALREADY); | |
739 | ||
740 | /* Throttling is allowed on BK_SYS class only */ | |
5ba3f43e | 741 | index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS); |
39037602 A |
742 | switch (tr->level) { |
743 | case IFNET_THROTTLE_OFF: | |
744 | fq_if_classq_resume(fqs, &fqs->fqs_classq[index]); | |
745 | break; | |
746 | case IFNET_THROTTLE_OPPORTUNISTIC: | |
747 | fq_if_classq_suspend(fqs, &fqs->fqs_classq[index]); | |
748 | break; | |
749 | default: | |
750 | break; | |
751 | } | |
752 | return (0); | |
753 | } | |
754 | ||
755 | void | |
756 | fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat) | |
757 | { | |
758 | u_int32_t pri; | |
759 | fq_if_classq_t *fq_cl; | |
760 | ||
761 | if (stat == NULL) | |
762 | return; | |
763 | ||
5ba3f43e | 764 | pri = fq_if_service_to_priority(fqs, stat->sc); |
39037602 A |
765 | fq_cl = &fqs->fqs_classq[pri]; |
766 | stat->packets = fq_cl->fcl_stat.fcl_pkt_cnt; | |
767 | stat->bytes = fq_cl->fcl_stat.fcl_byte_cnt; | |
768 | } | |
769 | ||
770 | int | |
771 | fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg) | |
772 | { | |
773 | int err = 0; | |
774 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; | |
775 | ||
776 | IFCQ_LOCK_ASSERT_HELD(ifq); | |
777 | ||
778 | /* | |
779 | * These are usually slow operations, convert the lock ahead of time | |
780 | */ | |
781 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); | |
782 | switch (rq) { | |
783 | case CLASSQRQ_PURGE: | |
784 | fq_if_purge(fqs); | |
785 | break; | |
786 | case CLASSQRQ_PURGE_SC: | |
787 | fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg); | |
788 | break; | |
789 | case CLASSQRQ_EVENT: | |
790 | fq_if_event(fqs, (cqev_t)arg); | |
791 | break; | |
792 | case CLASSQRQ_THROTTLE: | |
793 | fq_if_throttle(fqs, (cqrq_throttle_t *)arg); | |
794 | break; | |
795 | case CLASSQRQ_STAT_SC: | |
796 | fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg); | |
797 | break; | |
798 | } | |
799 | return (err); | |
800 | } | |
801 | ||
802 | int | |
5ba3f43e A |
803 | fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags, |
804 | classq_pkt_type_t ptype) | |
39037602 A |
805 | { |
806 | #pragma unused(flags) | |
807 | struct ifnet *ifp = ifq->ifcq_ifp; | |
808 | fq_if_t *fqs = NULL; | |
809 | int err = 0; | |
810 | ||
811 | IFCQ_LOCK_ASSERT_HELD(ifq); | |
812 | VERIFY(ifq->ifcq_disc == NULL); | |
813 | VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE); | |
814 | ||
5ba3f43e | 815 | fqs = fq_if_alloc(ifp, ptype); |
39037602 A |
816 | if (fqs == NULL) |
817 | return (ENOMEM); | |
818 | ||
5ba3f43e A |
819 | if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) { |
820 | fqs->fqs_flags |= FQS_DRIVER_MANAGED; | |
821 | fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500, | |
822 | 2, MBUF_SC_BK); | |
823 | fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500, | |
824 | 4, MBUF_SC_BE); | |
825 | fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000, | |
826 | 6, MBUF_SC_VI); | |
827 | fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600, | |
828 | 8, MBUF_SC_VO); | |
829 | } else { | |
830 | fq_if_classq_init(fqs, FQ_IF_BK_SYS_INDEX, 1500, | |
831 | 2, MBUF_SC_BK_SYS); | |
832 | fq_if_classq_init(fqs, FQ_IF_BK_INDEX, 1500, | |
833 | 2, MBUF_SC_BK); | |
834 | fq_if_classq_init(fqs, FQ_IF_BE_INDEX, 1500, | |
835 | 4, MBUF_SC_BE); | |
836 | fq_if_classq_init(fqs, FQ_IF_RD_INDEX, 1500, | |
837 | 4, MBUF_SC_RD); | |
838 | fq_if_classq_init(fqs, FQ_IF_OAM_INDEX, 1500, | |
839 | 4, MBUF_SC_OAM); | |
840 | fq_if_classq_init(fqs, FQ_IF_AV_INDEX, 3000, | |
841 | 6, MBUF_SC_AV); | |
842 | fq_if_classq_init(fqs, FQ_IF_RV_INDEX, 3000, | |
843 | 6, MBUF_SC_RV); | |
844 | fq_if_classq_init(fqs, FQ_IF_VI_INDEX, 3000, | |
845 | 6, MBUF_SC_VI); | |
846 | fq_if_classq_init(fqs, FQ_IF_VO_INDEX, 600, | |
847 | 8, MBUF_SC_VO); | |
848 | fq_if_classq_init(fqs, FQ_IF_CTL_INDEX, 600, | |
849 | 8, MBUF_SC_CTL); | |
850 | } | |
39037602 A |
851 | |
852 | err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs, | |
5ba3f43e A |
853 | fq_if_enqueue_classq, fq_if_dequeue_classq, |
854 | fq_if_dequeue_sc_classq, fq_if_dequeue_classq_multi, | |
855 | fq_if_dequeue_sc_classq_multi, fq_if_request_classq); | |
39037602 A |
856 | |
857 | if (err != 0) { | |
858 | printf("%s: error from ifclassq_attach, " | |
859 | "failed to attach fq_if: %d\n", __func__, err); | |
860 | fq_if_destroy(fqs); | |
861 | } | |
862 | return (err); | |
863 | } | |
864 | ||
865 | fq_t * | |
866 | fq_if_hash_pkt(fq_if_t *fqs, u_int32_t flowid, mbuf_svc_class_t svc_class, | |
5ba3f43e | 867 | u_int64_t now, boolean_t create, classq_pkt_type_t ptype) |
39037602 A |
868 | { |
869 | fq_t *fq = NULL; | |
870 | flowq_list_t *fq_list; | |
871 | fq_if_classq_t *fq_cl; | |
872 | u_int8_t fqs_hash_id; | |
873 | u_int8_t scidx; | |
874 | ||
5ba3f43e | 875 | scidx = fq_if_service_to_priority(fqs, svc_class); |
39037602 A |
876 | |
877 | fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid); | |
878 | ||
879 | fq_list = &fqs->fqs_flows[fqs_hash_id]; | |
880 | ||
881 | SLIST_FOREACH(fq, fq_list, fq_hashlink) { | |
882 | if (fq->fq_flowhash == flowid && | |
883 | fq->fq_sc_index == scidx) | |
884 | break; | |
885 | } | |
886 | if (fq == NULL && create == TRUE) { | |
5ba3f43e A |
887 | ASSERT(ptype == QP_MBUF); |
888 | ||
39037602 A |
889 | /* If the flow is not already on the list, allocate it */ |
890 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); | |
5ba3f43e | 891 | fq = fq_alloc(ptype); |
39037602 A |
892 | if (fq != NULL) { |
893 | fq->fq_flowhash = flowid; | |
894 | fq->fq_sc_index = scidx; | |
895 | fq->fq_updatetime = now + fqs->fqs_update_interval; | |
896 | fq_cl = &fqs->fqs_classq[scidx]; | |
39037602 A |
897 | fq->fq_flags = FQF_FLOWCTL_CAPABLE; |
898 | SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink); | |
899 | fq_cl->fcl_stat.fcl_flows_cnt++; | |
900 | } | |
901 | } | |
902 | ||
903 | /* | |
904 | * If getq time is not set because this is the first packet or after | |
905 | * idle time, set it now so that we can detect a stall. | |
906 | */ | |
5ba3f43e | 907 | if (fq != NULL && fq->fq_getqtime == 0) |
39037602 A |
908 | fq->fq_getqtime = now; |
909 | ||
910 | return (fq); | |
911 | } | |
912 | ||
5ba3f43e | 913 | void |
39037602 A |
914 | fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq) |
915 | { | |
916 | u_int8_t hash_id; | |
917 | hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash); | |
918 | SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq, | |
919 | fq_hashlink); | |
920 | fq_cl->fcl_stat.fcl_flows_cnt--; | |
921 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); | |
922 | fq_destroy(fq); | |
923 | ||
924 | } | |
925 | ||
926 | inline boolean_t | |
927 | fq_if_at_drop_limit(fq_if_t *fqs) | |
928 | { | |
929 | return (((IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ? | |
930 | TRUE : FALSE)); | |
931 | } | |
932 | ||
933 | static void | |
934 | fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq, | |
935 | bool remove_hash) | |
936 | { | |
937 | /* | |
938 | * Remove the flow queue if it is empty | |
939 | * and delete it | |
940 | */ | |
941 | STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq, | |
942 | fq_actlink); | |
943 | fq->fq_flags &= ~FQF_OLD_FLOW; | |
944 | fq_cl->fcl_stat.fcl_oldflows_cnt--; | |
945 | VERIFY(fq->fq_bytes == 0); | |
946 | ||
947 | if (remove_hash) { | |
948 | /* Remove from the hash list */ | |
949 | fq_if_destroy_flow(fqs, fq_cl, fq); | |
950 | } | |
951 | } | |
952 | ||
953 | static void | |
954 | fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, bool add_to_old) | |
955 | { | |
956 | /* Move to the end of old queue list */ | |
957 | STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq, | |
958 | flowq, fq_actlink); | |
959 | fq->fq_flags &= ~FQF_NEW_FLOW; | |
960 | fq_cl->fcl_stat.fcl_newflows_cnt--; | |
961 | ||
962 | if (add_to_old) { | |
963 | STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq, | |
964 | fq_actlink); | |
965 | fq->fq_flags |= FQF_OLD_FLOW; | |
966 | fq_cl->fcl_stat.fcl_oldflows_cnt++; | |
967 | } | |
968 | } | |
969 | ||
970 | inline void | |
971 | fq_if_drop_packet(fq_if_t *fqs) | |
972 | { | |
973 | fq_t *fq = fqs->fqs_large_flow; | |
39037602 | 974 | fq_if_classq_t *fq_cl; |
5ba3f43e A |
975 | pktsched_pkt_t pkt; |
976 | uint32_t *pkt_flags; | |
977 | uint64_t *pkt_timestamp; | |
39037602 A |
978 | |
979 | if (fq == NULL) | |
980 | return; | |
5ba3f43e A |
981 | /* queue can not be empty on the largest flow */ |
982 | VERIFY(!fq_empty(fq)); | |
39037602 A |
983 | |
984 | fq_cl = &fqs->fqs_classq[fq->fq_sc_index]; | |
5ba3f43e A |
985 | _PKTSCHED_PKT_INIT(&pkt); |
986 | (void)fq_getq_flow_internal(fqs, fq, &pkt); | |
39037602 | 987 | |
5ba3f43e A |
988 | pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL, |
989 | NULL, NULL); | |
39037602 A |
990 | |
991 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); | |
5ba3f43e A |
992 | *pkt_timestamp = 0; |
993 | if (pkt.pktsched_ptype == QP_MBUF) | |
994 | *pkt_flags &= ~PKTF_PRIV_GUARDED; | |
995 | ||
996 | if (fq_empty(fq)) { | |
997 | fqs->fqs_large_flow = NULL; | |
39037602 A |
998 | if (fq->fq_flags & FQF_OLD_FLOW) { |
999 | fq_if_empty_old_flow(fqs, fq_cl, fq, true); | |
1000 | } else { | |
1001 | VERIFY(fq->fq_flags & FQF_NEW_FLOW); | |
1002 | fq_if_empty_new_flow(fq, fq_cl, true); | |
1003 | } | |
1004 | } | |
5ba3f43e | 1005 | IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt)); |
39037602 | 1006 | |
5ba3f43e | 1007 | pktsched_free_pkt(&pkt); |
39037602 A |
1008 | fq_cl->fcl_stat.fcl_drop_overflow++; |
1009 | } | |
1010 | ||
1011 | inline void | |
1012 | fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq) | |
1013 | { | |
5ba3f43e A |
1014 | fq_t *prev_fq; |
1015 | ||
1016 | if (fqs->fqs_large_flow != NULL && | |
1017 | fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) | |
1018 | fqs->fqs_large_flow = NULL; | |
1019 | ||
1020 | if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) | |
1021 | return; | |
1022 | ||
1023 | prev_fq = fqs->fqs_large_flow; | |
1024 | if (prev_fq == NULL) { | |
1025 | if (!fq_empty(fq)) | |
1026 | fqs->fqs_large_flow = fq; | |
39037602 A |
1027 | return; |
1028 | } else if (fq->fq_bytes > prev_fq->fq_bytes) { | |
1029 | fqs->fqs_large_flow = fq; | |
1030 | } | |
1031 | } | |
1032 | ||
1033 | boolean_t | |
5ba3f43e A |
1034 | fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t flowid, |
1035 | uint8_t flowsrc, fq_if_classq_t *fq_cl) | |
39037602 A |
1036 | { |
1037 | struct flowadv_fcentry *fce; | |
39037602 A |
1038 | |
1039 | STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) { | |
5ba3f43e | 1040 | if ((uint8_t)fce->fce_flowsrc_type == flowsrc && |
39037602 A |
1041 | fce->fce_flowid == flowid) { |
1042 | /* Already on flowcontrol list */ | |
1043 | return (TRUE); | |
1044 | } | |
1045 | } | |
39037602 | 1046 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); |
5ba3f43e | 1047 | fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK); |
39037602 | 1048 | if (fce != NULL) { |
39037602 A |
1049 | /* XXX Add number of bytes in the queue */ |
1050 | STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link); | |
1051 | fq_cl->fcl_stat.fcl_flow_control++; | |
1052 | } | |
1053 | return ((fce != NULL) ? TRUE : FALSE); | |
1054 | } | |
1055 | ||
1056 | void | |
1057 | fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl) | |
1058 | { | |
1059 | struct flowadv_fcentry *fce = NULL; | |
1060 | ||
1061 | IFCQ_CONVERT_LOCK(fqs->fqs_ifq); | |
1062 | STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) { | |
1063 | if (fce->fce_flowid == fq->fq_flowhash) | |
1064 | break; | |
1065 | } | |
1066 | if (fce != NULL) { | |
1067 | STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry, | |
1068 | fce_link); | |
1069 | STAILQ_NEXT(fce, fce_link) = NULL; | |
1070 | flowadv_add_entry(fce); | |
1071 | fq_cl->fcl_stat.fcl_flow_feedback++; | |
1072 | } | |
1073 | fq->fq_flags &= ~FQF_FLOWCTL_ON; | |
1074 | } | |
1075 | ||
1076 | void | |
1077 | fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, u_int32_t pktlimit, | |
5ba3f43e A |
1078 | u_int32_t bytelimit, void **top, void **tail, |
1079 | u_int32_t *retpktcnt, u_int32_t *retbytecnt, boolean_t drvmgmt, | |
1080 | classq_pkt_type_t *ptype) | |
39037602 A |
1081 | { |
1082 | fq_t *fq = NULL, *tfq = NULL; | |
39037602 | 1083 | flowq_stailq_t temp_stailq; |
5ba3f43e A |
1084 | u_int32_t pktcnt, bytecnt; |
1085 | boolean_t qempty, limit_reached = FALSE; | |
1086 | void *last = NULL; | |
1087 | fq_getq_flow_t fq_getq_flow_fn; | |
1088 | ||
1089 | switch (fqs->fqs_ptype) { | |
1090 | case QP_MBUF: | |
1091 | fq_getq_flow_fn = fq_getq_flow_mbuf; | |
1092 | break; | |
1093 | ||
1094 | ||
1095 | default: | |
1096 | VERIFY(0); | |
1097 | /* NOTREACHED */ | |
1098 | } | |
39037602 A |
1099 | |
1100 | /* | |
1101 | * maximum byte limit should not be greater than the budget for | |
1102 | * this class | |
1103 | */ | |
5ba3f43e | 1104 | if ((int32_t)bytelimit > fq_cl->fcl_budget && !drvmgmt) |
39037602 A |
1105 | bytelimit = fq_cl->fcl_budget; |
1106 | ||
1107 | VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL); | |
1108 | ||
1109 | *top = NULL; | |
5ba3f43e | 1110 | *ptype = fqs->fqs_ptype; |
39037602 A |
1111 | pktcnt = bytecnt = 0; |
1112 | STAILQ_INIT(&temp_stailq); | |
1113 | ||
1114 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) { | |
5ba3f43e | 1115 | ASSERT((fq->fq_flags & (FQF_NEW_FLOW|FQF_OLD_FLOW)) == |
39037602 | 1116 | FQF_NEW_FLOW); |
39037602 | 1117 | |
5ba3f43e A |
1118 | limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit, |
1119 | pktlimit, top, &last, &bytecnt, &pktcnt, &qempty, | |
1120 | PKTF_NEW_FLOW); | |
39037602 | 1121 | |
5ba3f43e | 1122 | if (fq->fq_deficit <= 0 || qempty) |
39037602 | 1123 | fq_if_empty_new_flow(fq, fq_cl, true); |
5ba3f43e A |
1124 | fq->fq_deficit += fq_cl->fcl_quantum; |
1125 | if (limit_reached) | |
39037602 A |
1126 | goto done; |
1127 | } | |
1128 | ||
1129 | STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) { | |
1130 | VERIFY((fq->fq_flags & (FQF_NEW_FLOW|FQF_OLD_FLOW)) == | |
1131 | FQF_OLD_FLOW); | |
39037602 | 1132 | |
5ba3f43e A |
1133 | limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit, |
1134 | pktlimit, top, &last, &bytecnt, &pktcnt, &qempty, 0); | |
39037602 | 1135 | |
5ba3f43e | 1136 | if (qempty) { |
39037602 A |
1137 | fq_if_empty_old_flow(fqs, fq_cl, fq, true); |
1138 | } else if (fq->fq_deficit <= 0) { | |
1139 | STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, | |
1140 | flowq, fq_actlink); | |
1141 | /* | |
1142 | * Move to the end of the old queues list. We do not | |
1143 | * need to update the flow count since this flow | |
1144 | * will be added to the tail again | |
1145 | */ | |
1146 | STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink); | |
1147 | fq->fq_deficit += fq_cl->fcl_quantum; | |
1148 | } | |
5ba3f43e | 1149 | if (limit_reached) |
39037602 A |
1150 | break; |
1151 | } | |
1152 | ||
1153 | done: | |
1154 | if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) { | |
1155 | STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq); | |
1156 | } else if (!STAILQ_EMPTY(&temp_stailq)) { | |
1157 | fq_cl->fcl_old_flows = temp_stailq; | |
1158 | } | |
1159 | ||
1160 | if (last != NULL) { | |
1161 | VERIFY(*top != NULL); | |
1162 | if (tail != NULL) | |
1163 | *tail = last; | |
1164 | if (retpktcnt != NULL) | |
1165 | *retpktcnt = pktcnt; | |
1166 | if (retbytecnt != NULL) | |
1167 | *retbytecnt = bytecnt; | |
1168 | } | |
1169 | } | |
1170 | ||
1171 | int | |
1172 | fq_if_teardown_ifclassq(struct ifclassq *ifq) | |
1173 | { | |
1174 | fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc; | |
1175 | ||
1176 | IFCQ_LOCK_ASSERT_HELD(ifq); | |
1177 | VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL); | |
1178 | ||
1179 | fq_if_destroy(fqs); | |
1180 | ifq->ifcq_disc = NULL; | |
39037602 A |
1181 | return (ifclassq_detach(ifq)); |
1182 | } | |
1183 | ||
5ba3f43e A |
1184 | static void |
1185 | fq_export_flowstats(fq_if_t *fqs, fq_t *fq, | |
1186 | struct fq_codel_flowstats *flowstat) | |
1187 | { | |
1188 | bzero(flowstat, sizeof (*flowstat)); | |
1189 | flowstat->fqst_min_qdelay = fq->fq_min_qdelay; | |
1190 | flowstat->fqst_bytes = fq->fq_bytes; | |
1191 | flowstat->fqst_flowhash = fq->fq_flowhash; | |
1192 | if (fq->fq_flags & FQF_NEW_FLOW) | |
1193 | flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW; | |
1194 | if (fq->fq_flags & FQF_OLD_FLOW) | |
1195 | flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW; | |
1196 | if (fq->fq_flags & FQF_DELAY_HIGH) | |
1197 | flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH; | |
1198 | if (fq->fq_flags & FQF_FLOWCTL_ON) | |
1199 | flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON; | |
1200 | if (fqs->fqs_large_flow == fq) | |
1201 | flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW; | |
1202 | } | |
1203 | ||
39037602 A |
1204 | int |
1205 | fq_if_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t qid, | |
1206 | struct if_ifclassq_stats *ifqs) | |
1207 | { | |
1208 | struct fq_codel_classstats *fcls; | |
1209 | fq_if_classq_t *fq_cl; | |
1210 | fq_if_t *fqs; | |
5ba3f43e A |
1211 | fq_t *fq = NULL; |
1212 | u_int32_t i, flowstat_cnt; | |
39037602 A |
1213 | |
1214 | if (qid >= FQ_IF_MAX_CLASSES) | |
1215 | return (EINVAL); | |
1216 | ||
1217 | fqs = (fq_if_t *)ifq->ifcq_disc; | |
1218 | fcls = &ifqs->ifqs_fq_codel_stats; | |
1219 | ||
1220 | fq_cl = &fqs->fqs_classq[qid]; | |
1221 | ||
1222 | fcls->fcls_pri = fq_cl->fcl_pri; | |
1223 | fcls->fcls_service_class = fq_cl->fcl_service_class; | |
1224 | fcls->fcls_quantum = fq_cl->fcl_quantum; | |
1225 | fcls->fcls_drr_max = fq_cl->fcl_drr_max; | |
1226 | fcls->fcls_budget = fq_cl->fcl_budget; | |
1227 | fcls->fcls_target_qdelay = fqs->fqs_target_qdelay; | |
1228 | fcls->fcls_update_interval = fqs->fqs_update_interval; | |
1229 | fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control; | |
1230 | fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback; | |
1231 | fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall; | |
1232 | fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow; | |
1233 | fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early; | |
1234 | fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure; | |
1235 | fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt; | |
1236 | fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt; | |
1237 | fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt; | |
1238 | fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt; | |
1239 | fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail; | |
1240 | fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail; | |
1241 | fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue; | |
1242 | fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes; | |
1243 | fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt; | |
1244 | fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on; | |
1245 | fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off; | |
1246 | fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops; | |
1247 | fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts; | |
1248 | ||
5ba3f43e A |
1249 | /* Gather per flow stats */ |
1250 | flowstat_cnt = min((fcls->fcls_newflows_cnt + | |
1251 | fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS); | |
1252 | i = 0; | |
1253 | STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) { | |
1254 | if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) | |
1255 | break; | |
1256 | ||
1257 | /* leave space for a few old flows */ | |
1258 | if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt && | |
1259 | i >= (FQ_IF_MAX_FLOWSTATS >> 1)) | |
1260 | break; | |
1261 | fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]); | |
1262 | i++; | |
1263 | } | |
1264 | STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) { | |
1265 | if (i >= flowstat_cnt) | |
1266 | break; | |
1267 | fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]); | |
1268 | i++; | |
1269 | } | |
1270 | VERIFY(i <= flowstat_cnt); | |
1271 | fcls->fcls_flowstats_cnt = i; | |
39037602 A |
1272 | return (0); |
1273 | } |