]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/classq/classq_sfb.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / net / classq / classq_sfb.c
CommitLineData
316670eb 1/*
f427ee49 2 * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
316670eb
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/cdefs.h>
30#include <sys/param.h>
316670eb
A
31#include <sys/mbuf.h>
32#include <sys/socket.h>
33#include <sys/sockio.h>
34#include <sys/systm.h>
35#include <sys/sysctl.h>
36#include <sys/syslog.h>
37#include <sys/proc.h>
38#include <sys/errno.h>
39#include <sys/kernel.h>
40#include <sys/kauth.h>
41
42#include <kern/zalloc.h>
43
44#include <net/if.h>
45#include <net/if_var.h>
46#include <net/if_types.h>
47#include <net/dlil.h>
39236c6e 48#include <net/flowadv.h>
316670eb
A
49
50#include <netinet/in.h>
51#include <netinet/in_systm.h>
52#include <netinet/ip.h>
316670eb 53#include <netinet/ip6.h>
316670eb
A
54
55#include <net/classq/classq_sfb.h>
56#include <net/flowhash.h>
57#include <net/net_osdep.h>
39236c6e 58#include <dev/random/randomdev.h>
316670eb
A
59
60/*
61 * Stochastic Fair Blue
62 *
63 * Wu-chang Feng, Dilip D. Kandlur, Debanjan Saha, Kang G. Shin
64 * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
65 *
66 * Based on the NS code with the following parameters:
67 *
68 * bytes: false
69 * decrement: 0.001
70 * increment: 0.005
71 * hold-time: 10ms-50ms (randomized)
72 * algorithm: 0
73 * pbox: 1
74 * pbox-time: 50-100ms (randomized)
75 * hinterval: 11-23 (randomized)
76 *
77 * This implementation uses L = 2 and N = 32 for 2 sets of:
78 *
79 * B[L][N]: L x N array of bins (L levels, N bins per level)
80 *
81 * Each set effectively creates 32^2 virtual buckets (bin combinations)
82 * while using only O(32*2) states.
83 *
84 * Given a 32-bit hash value, we divide it such that octets [0,1,2,3] are
85 * used as index for the bins across the 2 levels, where level 1 uses [0,2]
86 * and level 2 uses [1,3]. The 2 values per level correspond to the indices
87 * for the current and warm-up sets (section 4.4. in the SFB paper regarding
88 * Moving Hash Functions explains the purposes of these 2 sets.)
89 */
90
91/*
92 * Use Murmur3A_x86_32 for hash function. It seems to perform consistently
93 * across platforms for 1-word key (32-bit flowhash value). See flowhash.h
94 * for other alternatives. We only need 16-bit hash output.
95 */
0a7de745
A
96#define SFB_HASH net_flowhash_mh3_x86_32
97#define SFB_HASHMASK HASHMASK(16)
316670eb 98
0a7de745 99#define SFB_BINMASK(_x) \
316670eb
A
100 ((_x) & HASHMASK(SFB_BINS_SHIFT))
101
0a7de745 102#define SFB_BINST(_sp, _l, _n, _c) \
316670eb
A
103 (&(*(_sp)->sfb_bins)[_c].stats[_l][_n])
104
0a7de745 105#define SFB_BINFT(_sp, _l, _n, _c) \
316670eb
A
106 (&(*(_sp)->sfb_bins)[_c].freezetime[_l][_n])
107
0a7de745 108#define SFB_FC_LIST(_sp, _n) \
316670eb
A
109 (&(*(_sp)->sfb_fc_lists)[_n])
110
111/*
112 * The holdtime parameter determines the minimum time interval between
113 * two successive updates of the marking probability. In the event the
114 * uplink speed is not known, a default value is chosen and is randomized
115 * to be within the following range.
116 */
0a7de745
A
117#define HOLDTIME_BASE (100ULL * 1000 * 1000) /* 100ms */
118#define HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10ms */
119#define HOLDTIME_MAX (100ULL * 1000 * 1000) /* 100ms */
316670eb
A
120
121/*
122 * The pboxtime parameter determines the bandwidth allocated for rogue
123 * flows, i.e. the rate limiting bandwidth. In the event the uplink speed
124 * is not known, a default value is chosen and is randomized to be within
125 * the following range.
126 */
0a7de745
A
127#define PBOXTIME_BASE (300ULL * 1000 * 1000) /* 300ms */
128#define PBOXTIME_MIN (30ULL * 1000 * 1000) /* 30ms */
129#define PBOXTIME_MAX (300ULL * 1000 * 1000) /* 300ms */
316670eb 130
fe8ab488
A
131/*
132 * Target queueing delay is the amount of extra delay that can be added
133 * to accommodate variations in the link bandwidth. The queue should be
134 * large enough to induce this much delay and nothing more than that.
135 */
0a7de745
A
136#define TARGET_QDELAY_BASE (10ULL * 1000 * 1000) /* 10ms */
137#define TARGET_QDELAY_MIN (10ULL * 1000) /* 10us */
138#define TARGET_QDELAY_MAX (20ULL * 1000 * 1000 * 1000) /* 20s */
fe8ab488
A
139
140/*
141 * Update interval for checking the extra delay added by the queue. This
39037602 142 * should be 90-95 percentile of RTT experienced by any TCP connection
fe8ab488
A
143 * so that it will take care of the burst traffic.
144 */
0a7de745
A
145#define UPDATE_INTERVAL_BASE (100ULL * 1000 * 1000) /* 100ms */
146#define UPDATE_INTERVAL_MIN (100ULL * 1000 * 1000) /* 100ms */
147#define UPDATE_INTERVAL_MAX (10ULL * 1000 * 1000 * 1000) /* 10s */
fe8ab488 148
0a7de745 149#define SFB_RANDOM(sp, tmin, tmax) ((sfb_random(sp) % (tmax)) + (tmin))
316670eb 150
0a7de745 151#define SFB_PKT_PBOX 0x1 /* in penalty box */
316670eb
A
152
153/* The following mantissa values are in SFB_FP_SHIFT Q format */
0a7de745 154#define SFB_MAX_PMARK (1 << SFB_FP_SHIFT) /* Q14 representation of 1.00 */
316670eb
A
155
156/*
157 * These are d1 (increment) and d2 (decrement) parameters, used to determine
158 * the amount by which the marking probability is incremented when the queue
159 * overflows, or is decremented when the link is idle. d1 is set higher than
160 * d2, because link underutilization can occur when congestion management is
161 * either too conservative or too aggressive, but packet loss occurs only
162 * when congestion management is too conservative. By weighing heavily
163 * against packet loss, it can quickly reach to a substantial increase in
164 * traffic load.
165 */
0a7de745
A
166#define SFB_INCREMENT 82 /* Q14 representation of 0.005 */
167#define SFB_DECREMENT 16 /* Q14 representation of 0.001 */
316670eb 168
0a7de745
A
169#define SFB_PMARK_TH 16056 /* Q14 representation of 0.98 */
170#define SFB_PMARK_WARM 3276 /* Q14 representation of 0.2 */
316670eb 171
0a7de745
A
172#define SFB_PMARK_INC(_bin) do { \
173 (_bin)->pmark += sfb_increment; \
174 if ((_bin)->pmark > SFB_MAX_PMARK) \
175 (_bin)->pmark = SFB_MAX_PMARK; \
316670eb
A
176} while (0)
177
0a7de745
A
178#define SFB_PMARK_DEC(_bin) do { \
179 if ((_bin)->pmark > 0) { \
180 (_bin)->pmark -= sfb_decrement; \
181 if ((_bin)->pmark < 0) \
182 (_bin)->pmark = 0; \
183 } \
316670eb
A
184} while (0)
185
fe8ab488 186/* Minimum nuber of bytes in queue to get flow controlled */
0a7de745 187#define SFB_MIN_FC_THRESHOLD_BYTES 7500
fe8ab488 188
0a7de745
A
189#define SFB_SET_DELAY_HIGH(_sp_, _q_) do { \
190 (_sp_)->sfb_flags |= SFBF_DELAYHIGH; \
f427ee49
A
191 (_sp_)->sfb_fc_threshold = ulmax(SFB_MIN_FC_THRESHOLD_BYTES, \
192 (uint32_t)(qsize((_q_)) >> 3)); \
fe8ab488
A
193} while (0)
194
0a7de745
A
195#define SFB_QUEUE_DELAYBASED(_sp_) ((_sp_)->sfb_flags & SFBF_DELAYBASED)
196#define SFB_IS_DELAYHIGH(_sp_) ((_sp_)->sfb_flags & SFBF_DELAYHIGH)
197#define SFB_QUEUE_DELAYBASED_MAXSIZE 2048 /* max pkts */
fe8ab488 198
0a7de745
A
199#define HINTERVAL_MIN (10) /* 10 seconds */
200#define HINTERVAL_MAX (20) /* 20 seconds */
201#define SFB_HINTERVAL(sp) ((sfb_random(sp) % HINTERVAL_MAX) + HINTERVAL_MIN)
316670eb 202
0a7de745
A
203#define DEQUEUE_DECAY 7 /* ilog2 of EWMA decay rate, (128) */
204#define DEQUEUE_SPIKE(_new, _old) \
316670eb
A
205 ((u_int64_t)ABS((int64_t)(_new) - (int64_t)(_old)) > ((_old) << 11))
206
316670eb 207/* Place the flow control entries in current bin on level 0 */
0a7de745 208#define SFB_FC_LEVEL 0
316670eb 209
f427ee49
A
210static ZONE_DECLARE(sfb_zone, "classq_sfb",
211 sizeof(struct sfb), ZC_ZFREE_CLEARMEM);
316670eb 212
f427ee49
A
213static ZONE_DECLARE(sfb_bins_zone, "classq_sfb_bins",
214 sizeof(struct sfb_bins), ZC_ZFREE_CLEARMEM);
39236c6e 215
f427ee49
A
216static ZONE_DECLARE(sfb_fcl_zone, "classq_sfb_fcl",
217 sizeof(struct sfb_fcl), ZC_ZFREE_CLEARMEM);
39236c6e 218
316670eb
A
219/* internal function prototypes */
220static u_int32_t sfb_random(struct sfb *);
5ba3f43e
A
221static void *sfb_getq_flow(struct sfb *, class_queue_t *, u_int32_t, boolean_t,
222 pktsched_pkt_t *);
316670eb
A
223static void sfb_resetq(struct sfb *, cqev_t);
224static void sfb_calc_holdtime(struct sfb *, u_int64_t);
225static void sfb_calc_pboxtime(struct sfb *, u_int64_t);
226static void sfb_calc_hinterval(struct sfb *, u_int64_t *);
fe8ab488 227static void sfb_calc_update_interval(struct sfb *, u_int64_t);
316670eb 228static void sfb_swap_bins(struct sfb *, u_int32_t);
5ba3f43e
A
229static inline int sfb_pcheck(struct sfb *, uint32_t);
230static int sfb_penalize(struct sfb *, uint32_t, uint32_t *, struct timespec *);
316670eb
A
231static void sfb_adjust_bin(struct sfb *, struct sfbbinstats *,
232 struct timespec *, struct timespec *, boolean_t);
233static void sfb_decrement_bin(struct sfb *, struct sfbbinstats *,
234 struct timespec *, struct timespec *);
235static void sfb_increment_bin(struct sfb *, struct sfbbinstats *,
236 struct timespec *, struct timespec *);
5ba3f43e 237static inline void sfb_dq_update_bins(struct sfb *, uint32_t, uint32_t,
f427ee49 238 struct timespec *, u_int64_t qsize);
5ba3f43e
A
239static inline void sfb_eq_update_bins(struct sfb *, uint32_t, uint32_t);
240static int sfb_drop_early(struct sfb *, uint32_t, u_int16_t *,
316670eb 241 struct timespec *);
5ba3f43e
A
242static boolean_t sfb_bin_addfcentry(struct sfb *, pktsched_pkt_t *,
243 uint32_t, uint8_t, uint32_t);
39236c6e 244static void sfb_fclist_append(struct sfb *, struct sfb_fcl *);
316670eb 245static void sfb_fclists_clean(struct sfb *sp);
fe8ab488
A
246static int sfb_bin_mark_or_drop(struct sfb *sp, struct sfbbinstats *bin);
247static void sfb_detect_dequeue_stall(struct sfb *sp, class_queue_t *,
248 struct timespec *);
316670eb 249
0a7de745 250SYSCTL_NODE(_net_classq, OID_AUTO, sfb, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "SFB");
316670eb 251
0a7de745
A
252static u_int64_t sfb_holdtime = 0; /* 0 indicates "automatic" */
253SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, holdtime, CTLFLAG_RW | CTLFLAG_LOCKED,
316670eb
A
254 &sfb_holdtime, "SFB freeze time in nanoseconds");
255
0a7de745
A
256static u_int64_t sfb_pboxtime = 0; /* 0 indicates "automatic" */
257SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, pboxtime, CTLFLAG_RW | CTLFLAG_LOCKED,
316670eb
A
258 &sfb_pboxtime, "SFB penalty box time in nanoseconds");
259
260static u_int64_t sfb_hinterval;
0a7de745 261SYSCTL_QUAD(_net_classq_sfb, OID_AUTO, hinterval, CTLFLAG_RW | CTLFLAG_LOCKED,
316670eb
A
262 &sfb_hinterval, "SFB hash interval in nanoseconds");
263
264static u_int32_t sfb_increment = SFB_INCREMENT;
0a7de745 265SYSCTL_UINT(_net_classq_sfb, OID_AUTO, increment, CTLFLAG_RW | CTLFLAG_LOCKED,
316670eb
A
266 &sfb_increment, SFB_INCREMENT, "SFB increment [d1]");
267
268static u_int32_t sfb_decrement = SFB_DECREMENT;
0a7de745 269SYSCTL_UINT(_net_classq_sfb, OID_AUTO, decrement, CTLFLAG_RW | CTLFLAG_LOCKED,
316670eb
A
270 &sfb_decrement, SFB_DECREMENT, "SFB decrement [d2]");
271
0a7de745
A
272static u_int32_t sfb_allocation = 0; /* 0 means "automatic" */
273SYSCTL_UINT(_net_classq_sfb, OID_AUTO, allocation, CTLFLAG_RW | CTLFLAG_LOCKED,
316670eb
A
274 &sfb_allocation, 0, "SFB bin allocation");
275
276static u_int32_t sfb_ratelimit = 0;
0a7de745
A
277SYSCTL_UINT(_net_classq_sfb, OID_AUTO, ratelimit, CTLFLAG_RW | CTLFLAG_LOCKED,
278 &sfb_ratelimit, 0, "SFB rate limit");
316670eb 279
0a7de745
A
280#define KBPS (1ULL * 1000) /* 1 Kbits per second */
281#define MBPS (1ULL * 1000 * 1000) /* 1 Mbits per second */
282#define GBPS (MBPS * 1000) /* 1 Gbits per second */
316670eb
A
283
284struct sfb_time_tbl {
0a7de745
A
285 u_int64_t speed; /* uplink speed */
286 u_int64_t holdtime; /* hold time */
287 u_int64_t pboxtime; /* penalty box time */
316670eb
A
288};
289
290static struct sfb_time_tbl sfb_ttbl[] = {
cb323159
A
291 { .speed = 1 * MBPS, .holdtime = HOLDTIME_BASE * 1000, .pboxtime = PBOXTIME_BASE * 1000},
292 { .speed = 10 * MBPS, .holdtime = HOLDTIME_BASE * 100, .pboxtime = PBOXTIME_BASE * 100 },
293 { .speed = 100 * MBPS, .holdtime = HOLDTIME_BASE * 10, .pboxtime = PBOXTIME_BASE * 10 },
294 { .speed = 1 * GBPS, .holdtime = HOLDTIME_BASE, .pboxtime = PBOXTIME_BASE },
295 { .speed = 10 * GBPS, .holdtime = HOLDTIME_BASE / 10, .pboxtime = PBOXTIME_BASE / 10 },
296 { .speed = 100 * GBPS, .holdtime = HOLDTIME_BASE / 100, .pboxtime = PBOXTIME_BASE / 100 },
297 { .speed = 0, .holdtime = 0, .pboxtime = 0 }
316670eb
A
298};
299
f427ee49
A
300static_assert(SFBF_ECN4 == CLASSQF_ECN4);
301static_assert(SFBF_ECN6 == CLASSQF_ECN6);
316670eb
A
302
303static u_int32_t
304sfb_random(struct sfb *sp)
305{
306 IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
0a7de745 307 return RandomULong();
316670eb
A
308}
309
310static void
311sfb_calc_holdtime(struct sfb *sp, u_int64_t outbw)
312{
313 u_int64_t holdtime;
314
315 if (sfb_holdtime != 0) {
316 holdtime = sfb_holdtime;
317 } else if (outbw == 0) {
318 holdtime = SFB_RANDOM(sp, HOLDTIME_MIN, HOLDTIME_MAX);
319 } else {
f427ee49 320 uint64_t n, i;
316670eb
A
321
322 n = sfb_ttbl[0].holdtime;
323 for (i = 0; sfb_ttbl[i].speed != 0; i++) {
0a7de745 324 if (outbw < sfb_ttbl[i].speed) {
316670eb 325 break;
0a7de745 326 }
316670eb
A
327 n = sfb_ttbl[i].holdtime;
328 }
329 holdtime = n;
330 }
331 net_nsectimer(&holdtime, &sp->sfb_holdtime);
332}
333
334static void
335sfb_calc_pboxtime(struct sfb *sp, u_int64_t outbw)
336{
337 u_int64_t pboxtime;
338
339 if (sfb_pboxtime != 0) {
340 pboxtime = sfb_pboxtime;
341 } else if (outbw == 0) {
342 pboxtime = SFB_RANDOM(sp, PBOXTIME_MIN, PBOXTIME_MAX);
343 } else {
f427ee49 344 uint64_t n, i;
316670eb
A
345
346 n = sfb_ttbl[0].pboxtime;
347 for (i = 0; sfb_ttbl[i].speed != 0; i++) {
0a7de745 348 if (outbw < sfb_ttbl[i].speed) {
316670eb 349 break;
0a7de745 350 }
316670eb
A
351 n = sfb_ttbl[i].pboxtime;
352 }
353 pboxtime = n;
354 }
355 net_nsectimer(&pboxtime, &sp->sfb_pboxtime);
356 net_timerclear(&sp->sfb_pboxfreeze);
357}
358
359static void
360sfb_calc_hinterval(struct sfb *sp, u_int64_t *t)
361{
5ba3f43e 362 u_int64_t hinterval = 0;
316670eb
A
363 struct timespec now;
364
365 if (t != NULL) {
366 /*
367 * TODO adi@apple.com: use dq_avg to derive hinterval.
368 */
369 hinterval = *t;
370 }
371
0a7de745 372 if (sfb_hinterval != 0) {
316670eb 373 hinterval = sfb_hinterval;
0a7de745 374 } else if (t == NULL || hinterval == 0) {
316670eb 375 hinterval = ((u_int64_t)SFB_HINTERVAL(sp) * NSEC_PER_SEC);
0a7de745 376 }
316670eb
A
377
378 net_nsectimer(&hinterval, &sp->sfb_hinterval);
379
380 nanouptime(&now);
381 net_timeradd(&now, &sp->sfb_hinterval, &sp->sfb_nextreset);
382}
383
fe8ab488
A
384static void
385sfb_calc_update_interval(struct sfb *sp, u_int64_t out_bw)
386{
387#pragma unused(out_bw)
388 u_int64_t update_interval = 0;
39037602 389 ifclassq_calc_update_interval(&update_interval);
fe8ab488
A
390 net_nsectimer(&update_interval, &sp->sfb_update_interval);
391}
392
316670eb
A
393/*
394 * sfb support routines
395 */
396struct sfb *
397sfb_alloc(struct ifnet *ifp, u_int32_t qid, u_int32_t qlim, u_int32_t flags)
398{
399 struct sfb *sp;
39236c6e 400 int i;
316670eb
A
401
402 VERIFY(ifp != NULL && qlim > 0);
403
f427ee49
A
404 sp = zalloc_flags(sfb_zone, Z_WAITOK | Z_ZERO);
405 sp->sfb_bins = zalloc_flags(sfb_bins_zone, Z_WAITOK | Z_ZERO);
406 sp->sfb_fc_lists = zalloc_flags(sfb_fcl_zone, Z_WAITOK | Z_ZERO);
39236c6e 407
0a7de745 408 for (i = 0; i < SFB_BINS; ++i) {
39236c6e 409 STAILQ_INIT(&SFB_FC_LIST(sp, i)->fclist);
0a7de745 410 }
316670eb 411
316670eb
A
412 sp->sfb_ifp = ifp;
413 sp->sfb_qlim = qlim;
414 sp->sfb_qid = qid;
39236c6e
A
415 sp->sfb_flags = (flags & SFBF_USERFLAGS);
416#if !PF_ECN
417 if (sp->sfb_flags & SFBF_ECN) {
418 sp->sfb_flags &= ~SFBF_ECN;
419 log(LOG_ERR, "%s: SFB qid=%d, ECN not available; ignoring "
420 "SFBF_ECN flag!\n", if_name(ifp), sp->sfb_qid);
421 }
422#endif /* !PF_ECN */
316670eb 423
5ba3f43e 424 sfb_resetq(sp, CLASSQ_EV_INIT);
316670eb 425
0a7de745 426 return sp;
316670eb
A
427}
428
429static void
39236c6e 430sfb_fclist_append(struct sfb *sp, struct sfb_fcl *fcl)
316670eb
A
431{
432 IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
39236c6e
A
433 VERIFY(STAILQ_EMPTY(&fcl->fclist) || fcl->cnt > 0);
434 sp->sfb_stats.flow_feedback += fcl->cnt;
435 fcl->cnt = 0;
436
437 flowadv_add(&fcl->fclist);
438 VERIFY(fcl->cnt == 0 && STAILQ_EMPTY(&fcl->fclist));
316670eb
A
439}
440
441static void
442sfb_fclists_clean(struct sfb *sp)
443{
444 int i;
445
39236c6e 446 /* Move all the flow control entries to the flowadv list */
316670eb 447 for (i = 0; i < SFB_BINS; ++i) {
39236c6e 448 struct sfb_fcl *fcl = SFB_FC_LIST(sp, i);
0a7de745 449 if (!STAILQ_EMPTY(&fcl->fclist)) {
316670eb 450 sfb_fclist_append(sp, fcl);
0a7de745 451 }
316670eb
A
452 }
453}
454
455void
456sfb_destroy(struct sfb *sp)
457{
458 sfb_fclists_clean(sp);
459 if (sp->sfb_bins != NULL) {
39236c6e 460 zfree(sfb_bins_zone, sp->sfb_bins);
316670eb
A
461 sp->sfb_bins = NULL;
462 }
463 if (sp->sfb_fc_lists != NULL) {
39236c6e 464 zfree(sfb_fcl_zone, sp->sfb_fc_lists);
316670eb
A
465 sp->sfb_fc_lists = NULL;
466 }
467 zfree(sfb_zone, sp);
468}
469
470static void
471sfb_resetq(struct sfb *sp, cqev_t ev)
472{
473 struct ifnet *ifp = sp->sfb_ifp;
474 u_int64_t eff_rate;
475
476 VERIFY(ifp != NULL);
477
478 if (ev != CLASSQ_EV_LINK_DOWN) {
479 (*sp->sfb_bins)[0].fudge = sfb_random(sp);
480 (*sp->sfb_bins)[1].fudge = sfb_random(sp);
f427ee49
A
481 sp->sfb_allocation = sfb_allocation == 0 ?
482 (uint16_t)(sp->sfb_qlim / 3) :
483 (uint16_t)sfb_allocation;
316670eb
A
484 sp->sfb_drop_thresh = sp->sfb_allocation +
485 (sp->sfb_allocation >> 1);
486 }
487
488 sp->sfb_clearpkts = 0;
489 sp->sfb_current = 0;
490
491 eff_rate = ifnet_output_linkrate(ifp);
492 sp->sfb_eff_rate = eff_rate;
493
494 sfb_calc_holdtime(sp, eff_rate);
495 sfb_calc_pboxtime(sp, eff_rate);
496 sfb_calc_hinterval(sp, NULL);
39037602 497 ifclassq_calc_target_qdelay(ifp, &sp->sfb_target_qdelay);
fe8ab488 498 sfb_calc_update_interval(sp, eff_rate);
316670eb
A
499
500 if (ev == CLASSQ_EV_LINK_DOWN ||
0a7de745 501 ev == CLASSQ_EV_LINK_UP) {
316670eb 502 sfb_fclists_clean(sp);
0a7de745 503 }
316670eb 504
0a7de745
A
505 bzero(sp->sfb_bins, sizeof(*sp->sfb_bins));
506 bzero(&sp->sfb_stats, sizeof(sp->sfb_stats));
316670eb 507
0a7de745 508 if (ev == CLASSQ_EV_LINK_DOWN || !classq_verbose) {
316670eb 509 return;
0a7de745 510 }
316670eb
A
511
512 log(LOG_DEBUG, "%s: SFB qid=%d, holdtime=%llu nsec, "
513 "pboxtime=%llu nsec, allocation=%d, drop_thresh=%d, "
fe8ab488
A
514 "hinterval=%d sec, sfb_bins=%d bytes, eff_rate=%llu bps"
515 "target_qdelay= %llu nsec "
516 "update_interval=%llu sec %llu nsec flags=0x%x\n",
316670eb
A
517 if_name(ifp), sp->sfb_qid, (u_int64_t)sp->sfb_holdtime.tv_nsec,
518 (u_int64_t)sp->sfb_pboxtime.tv_nsec,
519 (u_int32_t)sp->sfb_allocation, (u_int32_t)sp->sfb_drop_thresh,
0a7de745 520 (int)sp->sfb_hinterval.tv_sec, (int)sizeof(*sp->sfb_bins),
fe8ab488
A
521 eff_rate, (u_int64_t)sp->sfb_target_qdelay,
522 (u_int64_t)sp->sfb_update_interval.tv_sec,
523 (u_int64_t)sp->sfb_update_interval.tv_nsec, sp->sfb_flags);
316670eb
A
524}
525
526void
527sfb_getstats(struct sfb *sp, struct sfb_stats *sps)
528{
529 sps->allocation = sp->sfb_allocation;
530 sps->dropthresh = sp->sfb_drop_thresh;
531 sps->clearpkts = sp->sfb_clearpkts;
532 sps->current = sp->sfb_current;
fe8ab488
A
533 sps->target_qdelay = sp->sfb_target_qdelay;
534 sps->min_estdelay = sp->sfb_min_qdelay;
f427ee49 535 sps->delay_fcthreshold = (uint32_t)sp->sfb_fc_threshold;
fe8ab488 536 sps->flags = sp->sfb_flags;
316670eb
A
537
538 net_timernsec(&sp->sfb_holdtime, &sp->sfb_stats.hold_time);
539 net_timernsec(&sp->sfb_pboxtime, &sp->sfb_stats.pbox_time);
540 net_timernsec(&sp->sfb_hinterval, &sp->sfb_stats.rehash_intval);
fe8ab488 541 net_timernsec(&sp->sfb_update_interval, &sps->update_interval);
316670eb
A
542 *(&(sps->sfbstats)) = *(&(sp->sfb_stats));
543
0a7de745
A
544 _CASSERT(sizeof((*sp->sfb_bins)[0].stats) ==
545 sizeof(sps->binstats[0].stats));
316670eb
A
546
547 bcopy(&(*sp->sfb_bins)[0].stats, &sps->binstats[0].stats,
0a7de745 548 sizeof(sps->binstats[0].stats));
316670eb 549 bcopy(&(*sp->sfb_bins)[1].stats, &sps->binstats[1].stats,
0a7de745 550 sizeof(sps->binstats[1].stats));
316670eb
A
551}
552
553static void
554sfb_swap_bins(struct sfb *sp, u_int32_t len)
555{
556 int i, j, s;
557
0a7de745 558 if (sp->sfb_flags & SFBF_SUSPENDED) {
316670eb 559 return;
0a7de745 560 }
316670eb
A
561
562 s = sp->sfb_current;
563 VERIFY((s + (s ^ 1)) == 1);
564
565 (*sp->sfb_bins)[s].fudge = sfb_random(sp); /* recompute perturbation */
566 sp->sfb_clearpkts = len;
567 sp->sfb_stats.num_rehash++;
568
0a7de745 569 s = (sp->sfb_current ^= 1); /* flip the bit (swap current) */
316670eb
A
570
571 if (classq_verbose) {
572 log(LOG_DEBUG, "%s: SFB qid=%d, set %d is now current, "
573 "qlen=%d\n", if_name(sp->sfb_ifp), sp->sfb_qid, s, len);
574 }
575
576 /* clear freezetime for all current bins */
577 bzero(&(*sp->sfb_bins)[s].freezetime,
0a7de745 578 sizeof((*sp->sfb_bins)[s].freezetime));
316670eb
A
579
580 /* clear/adjust bin statistics and flow control lists */
581 for (i = 0; i < SFB_BINS; i++) {
39236c6e 582 struct sfb_fcl *fcl = SFB_FC_LIST(sp, i);
316670eb 583
0a7de745 584 if (!STAILQ_EMPTY(&fcl->fclist)) {
316670eb 585 sfb_fclist_append(sp, fcl);
0a7de745 586 }
316670eb
A
587
588 for (j = 0; j < SFB_LEVELS; j++) {
589 struct sfbbinstats *cbin, *wbin;
590
0a7de745
A
591 cbin = SFB_BINST(sp, j, i, s); /* current */
592 wbin = SFB_BINST(sp, j, i, s ^ 1); /* warm-up */
316670eb
A
593
594 cbin->pkts = 0;
fe8ab488 595 cbin->bytes = 0;
0a7de745 596 if (cbin->pmark > SFB_MAX_PMARK) {
316670eb 597 cbin->pmark = SFB_MAX_PMARK;
0a7de745
A
598 }
599 if (cbin->pmark < 0) {
316670eb 600 cbin->pmark = 0;
0a7de745 601 }
316670eb
A
602
603 /*
604 * Keep pmark from before to identify
605 * non-responsives immediately.
606 */
0a7de745 607 if (wbin->pmark > SFB_PMARK_WARM) {
316670eb 608 wbin->pmark = SFB_PMARK_WARM;
0a7de745 609 }
316670eb
A
610 }
611 }
612}
613
614static inline int
5ba3f43e 615sfb_pcheck(struct sfb *sp, uint32_t pkt_sfb_hash)
316670eb
A
616{
617#if SFB_LEVELS != 2
618 int i, n;
619#endif /* SFB_LEVELS != 2 */
5ba3f43e 620 uint8_t *pkt_sfb_hash8 = (uint8_t *)&pkt_sfb_hash;
316670eb
A
621 int s;
622
623 s = sp->sfb_current;
624 VERIFY((s + (s ^ 1)) == 1);
625
626 /*
627 * For current bins, returns 1 if all pmark >= SFB_PMARK_TH,
628 * 0 otherwise; optimize for SFB_LEVELS=2.
629 */
630#if SFB_LEVELS == 2
631 /*
632 * Level 0: bin index at [0] for set 0; [2] for set 1
633 * Level 1: bin index at [1] for set 0; [3] for set 1
634 */
5ba3f43e 635 if (SFB_BINST(sp, 0, SFB_BINMASK(pkt_sfb_hash8[(s << 1)]),
316670eb 636 s)->pmark < SFB_PMARK_TH ||
5ba3f43e 637 SFB_BINST(sp, 1, SFB_BINMASK(pkt_sfb_hash8[(s << 1) + 1]),
0a7de745
A
638 s)->pmark < SFB_PMARK_TH) {
639 return 0;
640 }
316670eb
A
641#else /* SFB_LEVELS != 2 */
642 for (i = 0; i < SFB_LEVELS; i++) {
0a7de745 643 if (s == 0) { /* set 0, bin index [0,1] */
5ba3f43e 644 n = SFB_BINMASK(pkt_sfb_hash8[i]);
0a7de745 645 } else { /* set 1, bin index [2,3] */
5ba3f43e 646 n = SFB_BINMASK(pkt_sfb_hash8[i + 2]);
0a7de745 647 }
316670eb 648
0a7de745
A
649 if (SFB_BINST(sp, i, n, s)->pmark < SFB_PMARK_TH) {
650 return 0;
651 }
316670eb
A
652 }
653#endif /* SFB_LEVELS != 2 */
0a7de745 654 return 1;
316670eb
A
655}
656
657static int
5ba3f43e
A
658sfb_penalize(struct sfb *sp, uint32_t pkt_sfb_hash, uint32_t *pkt_sfb_flags,
659 struct timespec *now)
316670eb 660{
cb323159 661 struct timespec delta = { .tv_sec = 0, .tv_nsec = 0 };
5ba3f43e 662 uint8_t *pkt_sfb_hash8 = (uint8_t *)&pkt_sfb_hash;
316670eb
A
663
664 /* If minimum pmark of current bins is < SFB_PMARK_TH, we're done */
0a7de745
A
665 if (!sfb_ratelimit || !sfb_pcheck(sp, pkt_sfb_hash)) {
666 return 0;
667 }
316670eb
A
668
669 net_timersub(now, &sp->sfb_pboxfreeze, &delta);
670 if (net_timercmp(&delta, &sp->sfb_pboxtime, <)) {
671#if SFB_LEVELS != 2
672 int i;
673#endif /* SFB_LEVELS != 2 */
674 struct sfbbinstats *bin;
675 int n, w;
676
677 w = sp->sfb_current ^ 1;
678 VERIFY((w + (w ^ 1)) == 1);
679
680 /*
681 * Update warm-up bins; optimize for SFB_LEVELS=2
682 */
683#if SFB_LEVELS == 2
684 /* Level 0: bin index at [0] for set 0; [2] for set 1 */
5ba3f43e 685 n = SFB_BINMASK(pkt_sfb_hash8[(w << 1)]);
316670eb 686 bin = SFB_BINST(sp, 0, n, w);
0a7de745 687 if (bin->pkts >= sp->sfb_allocation) {
316670eb 688 sfb_increment_bin(sp, bin, SFB_BINFT(sp, 0, n, w), now);
0a7de745 689 }
316670eb
A
690
691 /* Level 0: bin index at [1] for set 0; [3] for set 1 */
5ba3f43e 692 n = SFB_BINMASK(pkt_sfb_hash8[(w << 1) + 1]);
316670eb 693 bin = SFB_BINST(sp, 1, n, w);
0a7de745 694 if (bin->pkts >= sp->sfb_allocation) {
316670eb 695 sfb_increment_bin(sp, bin, SFB_BINFT(sp, 1, n, w), now);
0a7de745 696 }
316670eb
A
697#else /* SFB_LEVELS != 2 */
698 for (i = 0; i < SFB_LEVELS; i++) {
0a7de745 699 if (w == 0) { /* set 0, bin index [0,1] */
5ba3f43e 700 n = SFB_BINMASK(pkt_sfb_hash8[i]);
0a7de745 701 } else { /* set 1, bin index [2,3] */
5ba3f43e 702 n = SFB_BINMASK(pkt_sfb_hash8[i + 2]);
0a7de745 703 }
316670eb
A
704
705 bin = SFB_BINST(sp, i, n, w);
706 if (bin->pkts >= sp->sfb_allocation) {
707 sfb_increment_bin(sp, bin,
708 SFB_BINFT(sp, i, n, w), now);
709 }
710 }
711#endif /* SFB_LEVELS != 2 */
0a7de745 712 return 1;
316670eb
A
713 }
714
715 /* non-conformant or else misclassified flow; queue it anyway */
5ba3f43e 716 *pkt_sfb_flags |= SFB_PKT_PBOX;
316670eb
A
717 *(&sp->sfb_pboxfreeze) = *now;
718
0a7de745 719 return 0;
316670eb
A
720}
721
722static void
723sfb_adjust_bin(struct sfb *sp, struct sfbbinstats *bin, struct timespec *ft,
724 struct timespec *now, boolean_t inc)
725{
726 struct timespec delta;
727
728 net_timersub(now, ft, &delta);
729 if (net_timercmp(&delta, &sp->sfb_holdtime, <)) {
730 if (classq_verbose > 1) {
731 log(LOG_DEBUG, "%s: SFB qid=%d, %s update frozen "
732 "(delta=%llu nsec)\n", if_name(sp->sfb_ifp),
733 sp->sfb_qid, inc ? "increment" : "decrement",
734 (u_int64_t)delta.tv_nsec);
735 }
736 return;
737 }
738
739 /* increment/decrement marking probability */
740 *ft = *now;
0a7de745 741 if (inc) {
316670eb 742 SFB_PMARK_INC(bin);
0a7de745 743 } else {
316670eb 744 SFB_PMARK_DEC(bin);
0a7de745 745 }
316670eb
A
746}
747
748static void
749sfb_decrement_bin(struct sfb *sp, struct sfbbinstats *bin, struct timespec *ft,
750 struct timespec *now)
751{
0a7de745 752 return sfb_adjust_bin(sp, bin, ft, now, FALSE);
316670eb
A
753}
754
755static void
756sfb_increment_bin(struct sfb *sp, struct sfbbinstats *bin, struct timespec *ft,
757 struct timespec *now)
758{
0a7de745 759 return sfb_adjust_bin(sp, bin, ft, now, TRUE);
316670eb
A
760}
761
762static inline void
5ba3f43e 763sfb_dq_update_bins(struct sfb *sp, uint32_t pkt_sfb_hash, uint32_t pkt_len,
f427ee49 764 struct timespec *now, u_int64_t qsize)
316670eb
A
765{
766#if SFB_LEVELS != 2 || SFB_FC_LEVEL != 0
767 int i;
768#endif /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */
769 struct sfbbinstats *bin;
770 int s, n;
39236c6e 771 struct sfb_fcl *fcl = NULL;
5ba3f43e 772 uint8_t *pkt_sfb_hash8 = (uint8_t *)&pkt_sfb_hash;
316670eb
A
773
774 s = sp->sfb_current;
775 VERIFY((s + (s ^ 1)) == 1);
776
777 /*
778 * Update current bins; optimize for SFB_LEVELS=2 and SFB_FC_LEVEL=0
779 */
780#if SFB_LEVELS == 2 && SFB_FC_LEVEL == 0
781 /* Level 0: bin index at [0] for set 0; [2] for set 1 */
5ba3f43e 782 n = SFB_BINMASK(pkt_sfb_hash8[(s << 1)]);
316670eb
A
783 bin = SFB_BINST(sp, 0, n, s);
784
5ba3f43e 785 VERIFY(bin->pkts > 0 && bin->bytes >= pkt_len);
fe8ab488 786 bin->pkts--;
5ba3f43e 787 bin->bytes -= pkt_len;
fe8ab488 788
0a7de745 789 if (bin->pkts == 0) {
316670eb 790 sfb_decrement_bin(sp, bin, SFB_BINFT(sp, 0, n, s), now);
0a7de745 791 }
fe8ab488
A
792
793 /* Deliver flow control feedback to the sockets */
794 if (SFB_QUEUE_DELAYBASED(sp)) {
795 if (!(SFB_IS_DELAYHIGH(sp)) ||
796 bin->bytes <= sp->sfb_fc_threshold ||
0a7de745 797 bin->pkts == 0 || qsize == 0) {
fe8ab488 798 fcl = SFB_FC_LIST(sp, n);
0a7de745 799 }
fe8ab488 800 } else if (bin->pkts <= (sp->sfb_allocation >> 2)) {
0a7de745 801 fcl = SFB_FC_LIST(sp, n);
316670eb 802 }
fe8ab488 803
0a7de745 804 if (fcl != NULL && !STAILQ_EMPTY(&fcl->fclist)) {
fe8ab488 805 sfb_fclist_append(sp, fcl);
0a7de745 806 }
fe8ab488 807 fcl = NULL;
316670eb
A
808
809 /* Level 1: bin index at [1] for set 0; [3] for set 1 */
5ba3f43e 810 n = SFB_BINMASK(pkt_sfb_hash8[(s << 1) + 1]);
316670eb
A
811 bin = SFB_BINST(sp, 1, n, s);
812
5ba3f43e 813 VERIFY(bin->pkts > 0 && bin->bytes >= (u_int64_t)pkt_len);
fe8ab488 814 bin->pkts--;
5ba3f43e 815 bin->bytes -= pkt_len;
0a7de745 816 if (bin->pkts == 0) {
316670eb 817 sfb_decrement_bin(sp, bin, SFB_BINFT(sp, 1, n, s), now);
0a7de745 818 }
316670eb
A
819#else /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */
820 for (i = 0; i < SFB_LEVELS; i++) {
0a7de745 821 if (s == 0) { /* set 0, bin index [0,1] */
5ba3f43e 822 n = SFB_BINMASK(pkt_sfb_hash8[i]);
0a7de745 823 } else { /* set 1, bin index [2,3] */
5ba3f43e 824 n = SFB_BINMASK(pkt_sfb_hash8[i + 2]);
0a7de745 825 }
316670eb
A
826
827 bin = SFB_BINST(sp, i, n, s);
828
5ba3f43e 829 VERIFY(bin->pkts > 0 && bin->bytes >= pkt_len);
fe8ab488 830 bin->pkts--;
5ba3f43e 831 bin->bytes -= pkt_len;
0a7de745 832 if (bin->pkts == 0) {
316670eb
A
833 sfb_decrement_bin(sp, bin,
834 SFB_BINFT(sp, i, n, s), now);
0a7de745
A
835 }
836 if (i != SFB_FC_LEVEL) {
fe8ab488 837 continue;
0a7de745 838 }
fe8ab488
A
839 if (SFB_QUEUE_DELAYBASED(sp)) {
840 if (!(SFB_IS_DELAYHIGH(sp)) ||
0a7de745 841 bin->bytes <= sp->sfb_fc_threshold) {
316670eb 842 fcl = SFB_FC_LIST(sp, n);
0a7de745 843 }
fe8ab488
A
844 } else if (bin->pkts <= (sp->sfb_allocation >> 2)) {
845 fcl = SFB_FC_LIST(sp, n);
316670eb 846 }
0a7de745 847 if (fcl != NULL && !STAILQ_EMPTY(&fcl->fclist)) {
fe8ab488 848 sfb_fclist_append(sp, fcl);
0a7de745 849 }
fe8ab488 850 fcl = NULL;
316670eb
A
851 }
852#endif /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */
853}
854
855static inline void
5ba3f43e 856sfb_eq_update_bins(struct sfb *sp, uint32_t pkt_sfb_hash, uint32_t pkt_len)
316670eb
A
857{
858#if SFB_LEVELS != 2
859 int i, n;
860#endif /* SFB_LEVELS != 2 */
861 int s;
fe8ab488 862 struct sfbbinstats *bin;
5ba3f43e 863 uint8_t *pkt_sfb_hash8 = (uint8_t *)&pkt_sfb_hash;
316670eb
A
864 s = sp->sfb_current;
865 VERIFY((s + (s ^ 1)) == 1);
866
867 /*
868 * Update current bins; optimize for SFB_LEVELS=2
869 */
870#if SFB_LEVELS == 2
871 /* Level 0: bin index at [0] for set 0; [2] for set 1 */
fe8ab488 872 bin = SFB_BINST(sp, 0,
5ba3f43e 873 SFB_BINMASK(pkt_sfb_hash8[(s << 1)]), s);
fe8ab488 874 bin->pkts++;
5ba3f43e 875 bin->bytes += pkt_len;
316670eb
A
876
877 /* Level 1: bin index at [1] for set 0; [3] for set 1 */
fe8ab488 878 bin = SFB_BINST(sp, 1,
5ba3f43e 879 SFB_BINMASK(pkt_sfb_hash8[(s << 1) + 1]), s);
fe8ab488 880 bin->pkts++;
5ba3f43e 881 bin->bytes += pkt_len;
fe8ab488 882
316670eb
A
883#else /* SFB_LEVELS != 2 */
884 for (i = 0; i < SFB_LEVELS; i++) {
0a7de745 885 if (s == 0) { /* set 0, bin index [0,1] */
5ba3f43e 886 n = SFB_BINMASK(pkt_sfb_hash8[i]);
0a7de745 887 } else { /* set 1, bin index [2,3] */
5ba3f43e 888 n = SFB_BINMASK(pkt_sfb_hash8[i + 2]);
0a7de745 889 }
316670eb 890
fe8ab488
A
891 bin = SFB_BINST(sp, i, n, s);
892 bin->pkts++;
5ba3f43e 893 bin->bytes += pkt_len;
316670eb
A
894 }
895#endif /* SFB_LEVELS != 2 */
896}
897
898static boolean_t
5ba3f43e
A
899sfb_bin_addfcentry(struct sfb *sp, pktsched_pkt_t *pkt, uint32_t pkt_sfb_hash,
900 uint8_t flowsrc, uint32_t flowid)
316670eb 901{
39236c6e 902 struct flowadv_fcentry *fce;
39236c6e 903 struct sfb_fcl *fcl;
316670eb 904 int s;
5ba3f43e 905 uint8_t *pkt_sfb_hash8 = (uint8_t *)&pkt_sfb_hash;
316670eb
A
906
907 s = sp->sfb_current;
908 VERIFY((s + (s ^ 1)) == 1);
909
39236c6e
A
910 if (flowid == 0) {
911 sp->sfb_stats.null_flowid++;
0a7de745 912 return FALSE;
316670eb
A
913 }
914
915 /*
916 * Use value at index 0 for set 0 and
917 * value at index 2 for set 1
918 */
5ba3f43e 919 fcl = SFB_FC_LIST(sp, SFB_BINMASK(pkt_sfb_hash8[(s << 1)]));
39236c6e 920 STAILQ_FOREACH(fce, &fcl->fclist, fce_link) {
5ba3f43e 921 if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
39236c6e 922 fce->fce_flowid == flowid) {
316670eb 923 /* Already on flow control list; just return */
0a7de745 924 return TRUE;
316670eb
A
925 }
926 }
927
928 IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
5ba3f43e 929 fce = pktsched_alloc_fcentry(pkt, sp->sfb_ifp, M_WAITOK);
316670eb 930 if (fce != NULL) {
39236c6e
A
931 STAILQ_INSERT_TAIL(&fcl->fclist, fce, fce_link);
932 fcl->cnt++;
316670eb
A
933 sp->sfb_stats.flow_controlled++;
934 }
935
0a7de745 936 return fce != NULL;
316670eb
A
937}
938
fe8ab488
A
939/*
940 * check if this flow needs to be flow-controlled or if this
941 * packet needs to be dropped.
942 */
943static int
944sfb_bin_mark_or_drop(struct sfb *sp, struct sfbbinstats *bin)
945{
946 int ret = 0;
947 if (SFB_QUEUE_DELAYBASED(sp)) {
948 /*
949 * Mark or drop if this bin has more
950 * bytes than the flowcontrol threshold.
951 */
952 if (SFB_IS_DELAYHIGH(sp) &&
0a7de745 953 bin->bytes >= (sp->sfb_fc_threshold << 1)) {
fe8ab488 954 ret = 1;
0a7de745 955 }
fe8ab488
A
956 } else {
957 if (bin->pkts >= sp->sfb_allocation &&
0a7de745
A
958 bin->pkts >= sp->sfb_drop_thresh) {
959 ret = 1; /* drop or mark */
960 }
fe8ab488 961 }
0a7de745 962 return ret;
fe8ab488
A
963}
964
316670eb
A
965/*
966 * early-drop probability is kept in pmark of each bin of the flow
967 */
968static int
5ba3f43e 969sfb_drop_early(struct sfb *sp, uint32_t pkt_sfb_hash, u_int16_t *pmin,
316670eb
A
970 struct timespec *now)
971{
972#if SFB_LEVELS != 2
973 int i;
974#endif /* SFB_LEVELS != 2 */
975 struct sfbbinstats *bin;
976 int s, n, ret = 0;
5ba3f43e 977 uint8_t *pkt_sfb_hash8 = (uint8_t *)&pkt_sfb_hash;
316670eb
A
978
979 s = sp->sfb_current;
980 VERIFY((s + (s ^ 1)) == 1);
981
982 *pmin = (u_int16_t)-1;
983
984 /*
985 * Update current bins; optimize for SFB_LEVELS=2
986 */
987#if SFB_LEVELS == 2
988 /* Level 0: bin index at [0] for set 0; [2] for set 1 */
5ba3f43e 989 n = SFB_BINMASK(pkt_sfb_hash8[(s << 1)]);
316670eb 990 bin = SFB_BINST(sp, 0, n, s);
0a7de745 991 if (*pmin > (u_int16_t)bin->pmark) {
316670eb 992 *pmin = (u_int16_t)bin->pmark;
0a7de745 993 }
316670eb 994
fe8ab488
A
995
996 /* Update SFB probability */
0a7de745 997 if (bin->pkts >= sp->sfb_allocation) {
316670eb 998 sfb_increment_bin(sp, bin, SFB_BINFT(sp, 0, n, s), now);
0a7de745 999 }
fe8ab488
A
1000
1001 ret = sfb_bin_mark_or_drop(sp, bin);
316670eb
A
1002
1003 /* Level 1: bin index at [1] for set 0; [3] for set 1 */
5ba3f43e 1004 n = SFB_BINMASK(pkt_sfb_hash8[(s << 1) + 1]);
316670eb 1005 bin = SFB_BINST(sp, 1, n, s);
0a7de745 1006 if (*pmin > (u_int16_t)bin->pmark) {
316670eb 1007 *pmin = (u_int16_t)bin->pmark;
0a7de745 1008 }
316670eb 1009
0a7de745 1010 if (bin->pkts >= sp->sfb_allocation) {
316670eb 1011 sfb_increment_bin(sp, bin, SFB_BINFT(sp, 1, n, s), now);
0a7de745 1012 }
316670eb
A
1013#else /* SFB_LEVELS != 2 */
1014 for (i = 0; i < SFB_LEVELS; i++) {
0a7de745 1015 if (s == 0) { /* set 0, bin index [0,1] */
5ba3f43e 1016 n = SFB_BINMASK(pkt_sfb_hash8[i]);
0a7de745 1017 } else { /* set 1, bin index [2,3] */
5ba3f43e 1018 n = SFB_BINMASK(pkt_sfb_hash8[i + 2]);
0a7de745 1019 }
316670eb
A
1020
1021 bin = SFB_BINST(sp, i, n, s);
0a7de745 1022 if (*pmin > (u_int16_t)bin->pmark) {
316670eb 1023 *pmin = (u_int16_t)bin->pmark;
0a7de745 1024 }
316670eb 1025
0a7de745 1026 if (bin->pkts >= sp->sfb_allocation) {
316670eb
A
1027 sfb_increment_bin(sp, bin,
1028 SFB_BINFT(sp, i, n, s), now);
0a7de745
A
1029 }
1030 if (i == SFB_FC_LEVEL) {
fe8ab488 1031 ret = sfb_bin_mark_or_drop(sp, bin);
0a7de745 1032 }
316670eb
A
1033 }
1034#endif /* SFB_LEVELS != 2 */
1035
0a7de745
A
1036 if (sp->sfb_flags & SFBF_SUSPENDED) {
1037 ret = 1; /* drop or mark */
1038 }
1039 return ret;
316670eb
A
1040}
1041
fe8ab488
A
1042void
1043sfb_detect_dequeue_stall(struct sfb *sp, class_queue_t *q,
1044 struct timespec *now)
1045{
1046 struct timespec max_getqtime;
1047
1048 if (!SFB_QUEUE_DELAYBASED(sp) || SFB_IS_DELAYHIGH(sp) ||
1049 qsize(q) <= SFB_MIN_FC_THRESHOLD_BYTES ||
0a7de745 1050 !net_timerisset(&sp->sfb_getqtime)) {
fe8ab488 1051 return;
0a7de745 1052 }
fe8ab488
A
1053
1054 net_timeradd(&sp->sfb_getqtime, &sp->sfb_update_interval,
1055 &max_getqtime);
1056 if (net_timercmp(now, &max_getqtime, >)) {
1057 /*
1058 * No packets have been dequeued in an update interval
1059 * worth of time. It means that the queue is stalled
1060 */
1061 SFB_SET_DELAY_HIGH(sp, q);
1062 sp->sfb_stats.dequeue_stall++;
1063 }
1064}
1065
0a7de745
A
1066#define DTYPE_NODROP 0 /* no drop */
1067#define DTYPE_FORCED 1 /* a "forced" drop */
1068#define DTYPE_EARLY 2 /* an "unforced" (early) drop */
316670eb
A
1069
1070int
5ba3f43e
A
1071sfb_addq(struct sfb *sp, class_queue_t *q, pktsched_pkt_t *pkt,
1072 struct pf_mtag *t)
316670eb 1073{
39236c6e
A
1074#if !PF_ECN
1075#pragma unused(t)
1076#endif /* !PF_ECN */
316670eb
A
1077 struct timespec now;
1078 int droptype, s;
5ba3f43e 1079 uint16_t pmin;
316670eb
A
1080 int fc_adv = 0;
1081 int ret = CLASSQEQ_SUCCESS;
5ba3f43e
A
1082 uint32_t maxqsize = 0;
1083 uint64_t *pkt_timestamp;
1084 uint32_t *pkt_sfb_hash;
1085 uint16_t *pkt_sfb_hash16;
1086 uint32_t *pkt_sfb_flags;
1087 uint32_t pkt_flowid;
cb323159 1088 volatile uint32_t *pkt_flags;
5ba3f43e 1089 uint8_t pkt_proto, pkt_flowsrc;
316670eb
A
1090
1091 s = sp->sfb_current;
1092 VERIFY((s + (s ^ 1)) == 1);
1093
5ba3f43e
A
1094 pktsched_get_pkt_vars(pkt, &pkt_flags, &pkt_timestamp, &pkt_flowid,
1095 &pkt_flowsrc, &pkt_proto, NULL);
1096 pkt_sfb_hash = pktsched_get_pkt_sfb_vars(pkt, &pkt_sfb_flags);
1097 pkt_sfb_hash16 = (uint16_t *)pkt_sfb_hash;
fe8ab488 1098
cb323159
A
1099 switch (pkt->pktsched_ptype) {
1100 case QP_MBUF:
5ba3f43e
A
1101 /* See comments in <rdar://problem/14040693> */
1102 VERIFY(!(*pkt_flags & PKTF_PRIV_GUARDED));
1103 *pkt_flags |= PKTF_PRIV_GUARDED;
cb323159
A
1104 break;
1105 default:
1106 VERIFY(0);
1107 /* NOTREACHED */
1108 __builtin_unreachable();
5ba3f43e
A
1109 }
1110
1111 if (*pkt_timestamp > 0) {
1112 net_nsectimer(pkt_timestamp, &now);
3e170ce0
A
1113 } else {
1114 nanouptime(&now);
5ba3f43e 1115 net_timernsec(&now, pkt_timestamp);
3e170ce0
A
1116 }
1117
316670eb
A
1118 /* time to swap the bins? */
1119 if (net_timercmp(&now, &sp->sfb_nextreset, >=)) {
1120 net_timeradd(&now, &sp->sfb_hinterval, &sp->sfb_nextreset);
1121 sfb_swap_bins(sp, qlen(q));
1122 s = sp->sfb_current;
1123 VERIFY((s + (s ^ 1)) == 1);
1124 }
1125
fe8ab488
A
1126 if (!net_timerisset(&sp->sfb_update_time)) {
1127 net_timeradd(&now, &sp->sfb_update_interval,
1128 &sp->sfb_update_time);
1129 }
1130
3e170ce0
A
1131 /*
1132 * If getq time is not set because this is the first packet
1133 * or after idle time, set it now so that we can detect a stall.
1134 */
0a7de745 1135 if (qsize(q) == 0 && !net_timerisset(&sp->sfb_getqtime)) {
3e170ce0 1136 *(&sp->sfb_getqtime) = *(&now);
0a7de745 1137 }
3e170ce0 1138
5ba3f43e
A
1139 *pkt_sfb_flags = 0;
1140 pkt_sfb_hash16[s] =
0a7de745 1141 (SFB_HASH(&pkt_flowid, sizeof(pkt_flowid),
316670eb 1142 (*sp->sfb_bins)[s].fudge) & SFB_HASHMASK);
5ba3f43e 1143 pkt_sfb_hash16[s ^ 1] =
0a7de745 1144 (SFB_HASH(&pkt_flowid, sizeof(pkt_flowid),
316670eb
A
1145 (*sp->sfb_bins)[s ^ 1].fudge) & SFB_HASHMASK);
1146
fe8ab488
A
1147 /* check if the queue has been stalled */
1148 sfb_detect_dequeue_stall(sp, q, &now);
1149
316670eb
A
1150 /* see if we drop early */
1151 droptype = DTYPE_NODROP;
5ba3f43e 1152 if (sfb_drop_early(sp, *pkt_sfb_hash, &pmin, &now)) {
316670eb
A
1153 /* flow control, mark or drop by sfb */
1154 if ((sp->sfb_flags & SFBF_FLOWCTL) &&
5ba3f43e 1155 (*pkt_flags & PKTF_FLOW_ADV)) {
316670eb
A
1156 fc_adv = 1;
1157 /* drop all during suspension or for non-TCP */
1158 if ((sp->sfb_flags & SFBF_SUSPENDED) ||
5ba3f43e 1159 pkt_proto != IPPROTO_TCP) {
316670eb
A
1160 droptype = DTYPE_EARLY;
1161 sp->sfb_stats.drop_early++;
1162 }
39236c6e
A
1163 }
1164#if PF_ECN
5ba3f43e 1165 /* XXX: only supported for mbuf */
39236c6e 1166 else if ((sp->sfb_flags & SFBF_ECN) &&
5ba3f43e
A
1167 (pkt->pktsched_ptype == QP_MBUF) &&
1168 (pkt_proto == IPPROTO_TCP) && /* only for TCP */
316670eb
A
1169 ((sfb_random(sp) & SFB_MAX_PMARK) <= pmin) &&
1170 mark_ecn(m, t, sp->sfb_flags) &&
1171 !(sp->sfb_flags & SFBF_SUSPENDED)) {
1172 /* successfully marked; do not drop. */
1173 sp->sfb_stats.marked_packets++;
39236c6e
A
1174 }
1175#endif /* PF_ECN */
1176 else {
316670eb
A
1177 /* unforced drop by sfb */
1178 droptype = DTYPE_EARLY;
1179 sp->sfb_stats.drop_early++;
1180 }
1181 }
1182
1183 /* non-responsive flow penalty? */
5ba3f43e
A
1184 if (droptype == DTYPE_NODROP && sfb_penalize(sp, *pkt_sfb_hash,
1185 pkt_sfb_flags, &now)) {
316670eb
A
1186 droptype = DTYPE_FORCED;
1187 sp->sfb_stats.drop_pbox++;
1188 }
1189
0a7de745 1190 if (SFB_QUEUE_DELAYBASED(sp)) {
3e170ce0 1191 maxqsize = SFB_QUEUE_DELAYBASED_MAXSIZE;
0a7de745 1192 } else {
3e170ce0 1193 maxqsize = qlimit(q);
0a7de745 1194 }
fe8ab488
A
1195
1196 /*
3e170ce0
A
1197 * When the queue length hits the queue limit, make it a forced
1198 * drop
fe8ab488 1199 */
3e170ce0 1200 if (droptype == DTYPE_NODROP && qlen(q) >= maxqsize) {
5ba3f43e 1201 if (pkt_proto == IPPROTO_TCP &&
4bd07ac2 1202 qlen(q) < (maxqsize + (maxqsize >> 1)) &&
5ba3f43e 1203 ((*pkt_flags & PKTF_TCP_REXMT) ||
3e170ce0
A
1204 (sp->sfb_flags & SFBF_LAST_PKT_DROPPED))) {
1205 /*
1206 * At some level, dropping packets will make the
1207 * flows backoff and will keep memory requirements
1208 * under control. But we should not cause a tail
1209 * drop because it can take a long time for a
1210 * TCP flow to recover. We should try to drop
1211 * alternate packets instead.
1212 */
1213 sp->sfb_flags &= ~SFBF_LAST_PKT_DROPPED;
1214 } else {
1215 droptype = DTYPE_FORCED;
1216 sp->sfb_stats.drop_queue++;
1217 sp->sfb_flags |= SFBF_LAST_PKT_DROPPED;
1218 }
316670eb
A
1219 }
1220
1221 if (fc_adv == 1 && droptype != DTYPE_FORCED &&
5ba3f43e
A
1222 sfb_bin_addfcentry(sp, pkt, *pkt_sfb_hash, pkt_flowsrc,
1223 pkt_flowid)) {
316670eb
A
1224 /* deliver flow control advisory error */
1225 if (droptype == DTYPE_NODROP) {
1226 ret = CLASSQEQ_SUCCESS_FC;
1227 VERIFY(!(sp->sfb_flags & SFBF_SUSPENDED));
1228 } else if (sp->sfb_flags & SFBF_SUSPENDED) {
5ba3f43e
A
1229 /* drop due to suspension */
1230 ret = CLASSQEQ_DROP_SP;
316670eb 1231 } else {
5ba3f43e
A
1232 /* drop due to flow-control */
1233 ret = CLASSQEQ_DROP_FC;
316670eb
A
1234 }
1235 }
316670eb
A
1236 /* if successful enqueue this packet, else drop it */
1237 if (droptype == DTYPE_NODROP) {
5ba3f43e 1238 VERIFY(pkt->pktsched_ptype == qptype(q));
cb323159 1239 _addq(q, &pkt->pktsched_pkt);
316670eb
A
1240 } else {
1241 IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
0a7de745 1242 return (ret != CLASSQEQ_SUCCESS) ? ret : CLASSQEQ_DROP;
316670eb
A
1243 }
1244
0a7de745 1245 if (!(*pkt_sfb_flags & SFB_PKT_PBOX)) {
5ba3f43e
A
1246 sfb_eq_update_bins(sp, *pkt_sfb_hash,
1247 pktsched_get_pkt_len(pkt));
0a7de745 1248 } else {
316670eb 1249 sp->sfb_stats.pbox_packets++;
0a7de745 1250 }
316670eb
A
1251
1252 /* successfully queued */
0a7de745 1253 return ret;
316670eb
A
1254}
1255
5ba3f43e
A
1256static void *
1257sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge,
1258 pktsched_pkt_t *pkt)
316670eb
A
1259{
1260 struct timespec now;
5ba3f43e 1261 uint64_t *pkt_timestamp;
cb323159 1262 volatile uint32_t *pkt_flags;
5ba3f43e
A
1263 uint32_t *pkt_sfb_flags;
1264 uint32_t *pkt_sfb_hash;
cb323159 1265 classq_pkt_t p = CLASSQ_PKT_INITIALIZER(p);
316670eb 1266
0a7de745
A
1267 if (!purge && (sp->sfb_flags & SFBF_SUSPENDED)) {
1268 return NULL;
1269 }
316670eb
A
1270
1271 nanouptime(&now);
1272
1273 /* flow of 0 means head of queue */
cb323159
A
1274 if (flow == 0) {
1275 _getq(q, &p);
1276 } else {
1277 _getq_flow(q, &p, flow);
1278 }
1279
1280 if (p.cp_ptype == QP_INVALID) {
0a7de745 1281 if (!purge) {
316670eb 1282 net_timerclear(&sp->sfb_getqtime);
0a7de745
A
1283 }
1284 return NULL;
316670eb
A
1285 }
1286
cb323159 1287 pktsched_pkt_encap(pkt, &p);
5ba3f43e
A
1288 pktsched_get_pkt_vars(pkt, &pkt_flags, &pkt_timestamp, NULL,
1289 NULL, NULL, NULL);
1290 pkt_sfb_hash = pktsched_get_pkt_sfb_vars(pkt, &pkt_sfb_flags);
316670eb 1291
5ba3f43e 1292 /* See comments in <rdar://problem/14040693> */
cb323159
A
1293 switch (p.cp_ptype) {
1294 case QP_MBUF:
5ba3f43e 1295 VERIFY(*pkt_flags & PKTF_PRIV_GUARDED);
cb323159
A
1296 break;
1297 default:
1298 VERIFY(0);
1299 /* NOTREACHED */
1300 __builtin_unreachable();
0a7de745 1301 }
316670eb
A
1302
1303 if (!purge) {
1304 /* calculate EWMA of dequeues */
1305 if (net_timerisset(&sp->sfb_getqtime)) {
1306 struct timespec delta;
1307 u_int64_t avg, new;
316670eb
A
1308 net_timersub(&now, &sp->sfb_getqtime, &delta);
1309 net_timernsec(&delta, &new);
1310 avg = sp->sfb_stats.dequeue_avg;
1311 if (avg > 0) {
1312 int decay = DEQUEUE_DECAY;
1313 /*
1314 * If the time since last dequeue is
1315 * significantly greater than the current
fe8ab488 1316 * average, weigh the average more against
316670eb
A
1317 * the old value.
1318 */
0a7de745 1319 if (DEQUEUE_SPIKE(new, avg)) {
316670eb 1320 decay += 5;
0a7de745 1321 }
316670eb
A
1322 avg = (((avg << decay) - avg) + new) >> decay;
1323 } else {
1324 avg = new;
1325 }
1326 sp->sfb_stats.dequeue_avg = avg;
1327 }
1328 *(&sp->sfb_getqtime) = *(&now);
1329 }
1330
fe8ab488
A
1331 if (!purge && SFB_QUEUE_DELAYBASED(sp)) {
1332 u_int64_t dequeue_ns, queue_delay = 0;
1333 net_timernsec(&now, &dequeue_ns);
0a7de745 1334 if (dequeue_ns > *pkt_timestamp) {
5ba3f43e 1335 queue_delay = dequeue_ns - *pkt_timestamp;
0a7de745 1336 }
fe8ab488
A
1337
1338 if (sp->sfb_min_qdelay == 0 ||
0a7de745 1339 (queue_delay > 0 && queue_delay < sp->sfb_min_qdelay)) {
fe8ab488 1340 sp->sfb_min_qdelay = queue_delay;
0a7de745 1341 }
fe8ab488
A
1342 if (net_timercmp(&now, &sp->sfb_update_time, >=)) {
1343 if (sp->sfb_min_qdelay > sp->sfb_target_qdelay) {
0a7de745 1344 if (!SFB_IS_DELAYHIGH(sp)) {
fe8ab488 1345 SFB_SET_DELAY_HIGH(sp, q);
0a7de745 1346 }
fe8ab488
A
1347 } else {
1348 sp->sfb_flags &= ~(SFBF_DELAYHIGH);
1349 sp->sfb_fc_threshold = 0;
fe8ab488
A
1350 }
1351 net_timeradd(&now, &sp->sfb_update_interval,
1352 &sp->sfb_update_time);
1353 sp->sfb_min_qdelay = 0;
1354 }
1355 }
5ba3f43e 1356 *pkt_timestamp = 0;
fe8ab488 1357
316670eb
A
1358 /*
1359 * Clearpkts are the ones which were in the queue when the hash
1360 * function was perturbed. Since the perturbation value (fudge),
1361 * and thus bin information for these packets is not known, we do
1362 * not change accounting information while dequeuing these packets.
1363 * It is important not to set the hash interval too small due to
1364 * this reason. A rule of thumb is to set it to K*D, where D is
1365 * the time taken to drain queue.
1366 */
5ba3f43e
A
1367 if (*pkt_sfb_flags & SFB_PKT_PBOX) {
1368 *pkt_sfb_flags &= ~SFB_PKT_PBOX;
0a7de745 1369 if (sp->sfb_clearpkts > 0) {
316670eb 1370 sp->sfb_clearpkts--;
0a7de745 1371 }
316670eb
A
1372 } else if (sp->sfb_clearpkts > 0) {
1373 sp->sfb_clearpkts--;
1374 } else {
5ba3f43e
A
1375 sfb_dq_update_bins(sp, *pkt_sfb_hash, pktsched_get_pkt_len(pkt),
1376 &now, qsize(q));
fe8ab488
A
1377 }
1378
cb323159
A
1379 switch (p.cp_ptype) {
1380 case QP_MBUF:
1381 /* See comments in <rdar://problem/14040693> */
5ba3f43e 1382 *pkt_flags &= ~PKTF_PRIV_GUARDED;
cb323159
A
1383 break;
1384 default:
1385 VERIFY(0);
1386 /* NOTREACHED */
1387 __builtin_unreachable();
0a7de745 1388 }
fe8ab488
A
1389
1390 /*
1391 * If the queue becomes empty before the update interval, reset
1392 * the flow control threshold
1393 */
1394 if (qsize(q) == 0) {
1395 sp->sfb_flags &= ~SFBF_DELAYHIGH;
1396 sp->sfb_min_qdelay = 0;
1397 sp->sfb_fc_threshold = 0;
1398 net_timerclear(&sp->sfb_update_time);
3e170ce0 1399 net_timerclear(&sp->sfb_getqtime);
316670eb 1400 }
cb323159 1401 return pkt->pktsched_pkt_mbuf;
316670eb
A
1402}
1403
5ba3f43e
A
1404void
1405sfb_getq(struct sfb *sp, class_queue_t *q, pktsched_pkt_t *pkt)
316670eb 1406{
5ba3f43e 1407 sfb_getq_flow(sp, q, 0, FALSE, pkt);
316670eb
A
1408}
1409
1410void
1411sfb_purgeq(struct sfb *sp, class_queue_t *q, u_int32_t flow, u_int32_t *packets,
1412 u_int32_t *bytes)
1413{
1414 u_int32_t cnt = 0, len = 0;
5ba3f43e 1415 pktsched_pkt_t pkt;
316670eb
A
1416
1417 IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd);
5ba3f43e 1418 while (sfb_getq_flow(sp, q, flow, TRUE, &pkt) != NULL) {
316670eb 1419 cnt++;
5ba3f43e
A
1420 len += pktsched_get_pkt_len(&pkt);
1421 pktsched_free_pkt(&pkt);
316670eb
A
1422 }
1423
0a7de745 1424 if (packets != NULL) {
316670eb 1425 *packets = cnt;
0a7de745
A
1426 }
1427 if (bytes != NULL) {
316670eb 1428 *bytes = len;
0a7de745 1429 }
316670eb
A
1430}
1431
1432void
1433sfb_updateq(struct sfb *sp, cqev_t ev)
1434{
1435 struct ifnet *ifp = sp->sfb_ifp;
1436
1437 VERIFY(ifp != NULL);
1438
1439 switch (ev) {
39236c6e 1440 case CLASSQ_EV_LINK_BANDWIDTH: {
316670eb
A
1441 u_int64_t eff_rate = ifnet_output_linkrate(ifp);
1442
1443 /* update parameters only if rate has changed */
0a7de745 1444 if (eff_rate == sp->sfb_eff_rate) {
316670eb 1445 break;
0a7de745 1446 }
316670eb
A
1447
1448 if (classq_verbose) {
1449 log(LOG_DEBUG, "%s: SFB qid=%d, adapting to new "
1450 "eff_rate=%llu bps\n", if_name(ifp), sp->sfb_qid,
1451 eff_rate);
1452 }
1453 sfb_calc_holdtime(sp, eff_rate);
1454 sfb_calc_pboxtime(sp, eff_rate);
39037602 1455 ifclassq_calc_target_qdelay(ifp, &sp->sfb_target_qdelay);
fe8ab488 1456 sfb_calc_update_interval(sp, eff_rate);
316670eb
A
1457 break;
1458 }
1459
1460 case CLASSQ_EV_LINK_UP:
1461 case CLASSQ_EV_LINK_DOWN:
1462 if (classq_verbose) {
1463 log(LOG_DEBUG, "%s: SFB qid=%d, resetting due to "
1464 "link %s\n", if_name(ifp), sp->sfb_qid,
1465 (ev == CLASSQ_EV_LINK_UP) ? "UP" : "DOWN");
1466 }
1467 sfb_resetq(sp, ev);
1468 break;
1469
39236c6e 1470 case CLASSQ_EV_LINK_LATENCY:
316670eb
A
1471 case CLASSQ_EV_LINK_MTU:
1472 default:
1473 break;
1474 }
1475}
1476
1477int
1478sfb_suspendq(struct sfb *sp, class_queue_t *q, boolean_t on)
1479{
1480#pragma unused(q)
1481 struct ifnet *ifp = sp->sfb_ifp;
1482
1483 VERIFY(ifp != NULL);
1484
1485 if ((on && (sp->sfb_flags & SFBF_SUSPENDED)) ||
0a7de745
A
1486 (!on && !(sp->sfb_flags & SFBF_SUSPENDED))) {
1487 return 0;
1488 }
316670eb
A
1489
1490 if (!(sp->sfb_flags & SFBF_FLOWCTL)) {
1491 log(LOG_ERR, "%s: SFB qid=%d, unable to %s queue since "
1492 "flow-control is not enabled", if_name(ifp), sp->sfb_qid,
1493 (on ? "suspend" : "resume"));
0a7de745 1494 return ENOTSUP;
316670eb
A
1495 }
1496
1497 if (classq_verbose) {
1498 log(LOG_DEBUG, "%s: SFB qid=%d, setting state to %s",
1499 if_name(ifp), sp->sfb_qid, (on ? "SUSPENDED" : "RUNNING"));
1500 }
1501
1502 if (on) {
1503 sp->sfb_flags |= SFBF_SUSPENDED;
1504 } else {
1505 sp->sfb_flags &= ~SFBF_SUSPENDED;
1506 sfb_swap_bins(sp, qlen(q));
1507 }
1508
0a7de745 1509 return 0;
316670eb 1510}