X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/4d15aeb193b2c68f1d38666c317f8d3734f5f083..5ba3f43ea354af8ad55bea84372a2bc834d8757c:/bsd/net/classq/classq_red.c?ds=inline diff --git a/bsd/net/classq/classq_red.c b/bsd/net/classq/classq_red.c deleted file mode 100644 index 63cbd8ede..000000000 --- a/bsd/net/classq/classq_red.c +++ /dev/null @@ -1,630 +0,0 @@ -/* - * Copyright (c) 2007-2012 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* $OpenBSD: altq_red.c,v 1.14 2007/09/13 20:40:02 chl Exp $ */ -/* $KAME: altq_red.c,v 1.10 2002/04/03 05:38:51 kjc Exp $ */ - -/* - * Copyright (C) 1997-2003 - * Sony Computer Science Laboratories Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ -/* - * Copyright (c) 1990-1994 Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the Computer Systems - * Engineering Group at Lawrence Berkeley Laboratory. - * 4. Neither the name of the University nor of the Laboratory may be used - * to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include - -#if CLASSQ_RED - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#if INET6 -#include -#endif - -#include -#include - -/* - * ALTQ/RED (Random Early Detection) implementation using 32-bit - * fixed-point calculation. - * - * written by kjc using the ns code as a reference. - * you can learn more about red and ns from Sally's home page at - * http://www-nrg.ee.lbl.gov/floyd/ - * - * most of the red parameter values are fixed in this implementation - * to prevent fixed-point overflow/underflow. - * if you change the parameters, watch out for overflow/underflow! - * - * the parameters used are recommended values by Sally. - * the corresponding ns config looks: - * q_weight=0.00195 - * minthresh=5 maxthresh=15 queue-size=60 - * linterm=30 - * dropmech=drop-tail - * bytes=false (can't be handled by 32-bit fixed-point) - * doubleq=false dqthresh=false - * wait=true - */ -/* - * alternative red parameters for a slow link. - * - * assume the queue length becomes from zero to L and keeps L, it takes - * N packets for q_avg to reach 63% of L. - * when q_weight is 0.002, N is about 500 packets. - * for a slow link like dial-up, 500 packets takes more than 1 minute! - * when q_weight is 0.008, N is about 127 packets. - * when q_weight is 0.016, N is about 63 packets. - * bursts of 50 packets are allowed for 0.002, bursts of 25 packets - * are allowed for 0.016. - * see Sally's paper for more details. - */ -/* normal red parameters */ -#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ - /* q_weight = 0.00195 */ - -/* red parameters for a slow link */ -#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ - /* q_weight = 0.0078125 */ - -/* red parameters for a very slow link (e.g., dialup) */ -#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ - /* q_weight = 0.015625 */ - -/* fixed-point uses 12-bit decimal places */ -#define FP_SHIFT 12 /* fixed-point shift */ - -/* red parameters for drop probability */ -#define INV_P_MAX 10 /* inverse of max drop probability */ -#define TH_MIN 5 /* min threshold */ -#define TH_MAX 15 /* max threshold */ - -#define RED_LIMIT 60 /* default max queue lenght */ - -#define RED_ZONE_MAX 32 /* maximum elements in zone */ -#define RED_ZONE_NAME "classq_red" /* zone name */ - -static unsigned int red_size; /* size of zone element */ -static struct zone *red_zone; /* zone for red */ - -/* - * our default policy for forced-drop is drop-tail. - * (in altq-1.1.2 or earlier, the default was random-drop. - * but it makes more sense to punish the cause of the surge.) - * to switch to the random-drop policy, define "RED_RANDOM_DROP". - */ - -/* default red parameter values */ -static int default_th_min = TH_MIN; -static int default_th_max = TH_MAX; -static int default_inv_pmax = INV_P_MAX; - -static struct mbuf *red_getq_flow(struct red *, class_queue_t *, - u_int32_t, boolean_t); - -void -red_init(void) -{ - _CASSERT(REDF_ECN4 == CLASSQF_ECN4); - _CASSERT(REDF_ECN6 == CLASSQF_ECN6); - - red_size = sizeof (red_t); - red_zone = zinit(red_size, RED_ZONE_MAX * red_size, - 0, RED_ZONE_NAME); - if (red_zone == NULL) { - panic("%s: failed allocating %s", __func__, RED_ZONE_NAME); - /* NOTREACHED */ - } - zone_change(red_zone, Z_EXPAND, TRUE); - zone_change(red_zone, Z_CALLERACCT, TRUE); -} - -/* - * red support routines - */ -red_t * -red_alloc(struct ifnet *ifp, int weight, int inv_pmax, int th_min, - int th_max, int flags, int pkttime) -{ - red_t *rp; - int w, i; - int npkts_per_sec; - - VERIFY(ifp != NULL); - - rp = zalloc(red_zone); - if (rp == NULL) - return (NULL); - - bzero(rp, red_size); - rp->red_avg = 0; - rp->red_idle = 1; - - if (weight == 0) - rp->red_weight = W_WEIGHT; - else - rp->red_weight = weight; - if (inv_pmax == 0) - rp->red_inv_pmax = default_inv_pmax; - else - rp->red_inv_pmax = inv_pmax; - if (th_min == 0) - rp->red_thmin = default_th_min; - else - rp->red_thmin = th_min; - if (th_max == 0) - rp->red_thmax = default_th_max; - else - rp->red_thmax = th_max; - - rp->red_ifp = ifp; - rp->red_flags = (flags & REDF_USERFLAGS); -#if !PF_ECN - if (rp->red_flags & REDF_ECN) { - rp->red_flags &= ~REDF_ECN; - log(LOG_ERR, "%s: RED ECN not available; ignoring " - "REDF_ECN flag!\n", if_name(ifp)); - } -#endif /* !PF_ECN */ - - if (pkttime == 0) - /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ - rp->red_pkttime = 800; - else - rp->red_pkttime = pkttime; - - if (weight == 0) { - /* when the link is very slow, adjust red parameters */ - npkts_per_sec = 1000000 / rp->red_pkttime; - if (npkts_per_sec < 50) { - /* up to about 400Kbps */ - rp->red_weight = W_WEIGHT_2; - } else if (npkts_per_sec < 300) { - /* up to about 2.4Mbps */ - rp->red_weight = W_WEIGHT_1; - } - } - - /* calculate wshift. weight must be power of 2 */ - w = rp->red_weight; - for (i = 0; w > 1; i++) - w = w >> 1; - rp->red_wshift = i; - w = 1 << rp->red_wshift; - if (w != rp->red_weight) { - printf("invalid weight value %d for red! use %d\n", - rp->red_weight, w); - rp->red_weight = w; - } - - /* - * thmin_s and thmax_s are scaled versions of th_min and th_max - * to be compared with avg. - */ - rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); - rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); - - /* - * precompute probability denominator - * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point - */ - rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) * - rp->red_inv_pmax) << FP_SHIFT; - - /* allocate weight table */ - rp->red_wtab = wtab_alloc(rp->red_weight); - if (rp->red_wtab == NULL) { - red_destroy(rp); - return (NULL); - } - - microuptime(&rp->red_last); - return (rp); -} - -void -red_destroy(red_t *rp) -{ - if (rp->red_wtab != NULL) { - wtab_destroy(rp->red_wtab); - rp->red_wtab = NULL; - } - zfree(red_zone, rp); -} - -void -red_getstats(red_t *rp, struct red_stats *sp) -{ - sp->q_avg = rp->red_avg >> rp->red_wshift; - sp->drop_forced = rp->red_stats.drop_forced; - sp->drop_unforced = rp->red_stats.drop_unforced; - sp->marked_packets = rp->red_stats.marked_packets; -} - -int -red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct pf_mtag *tag) -{ -#if !PF_ECN -#pragma unused(tag) -#endif /* !PF_ECN */ - int avg, droptype; - int n; - - avg = rp->red_avg; - - /* - * if we were idle, we pretend that n packets arrived during - * the idle period. - */ - if (rp->red_idle) { - struct timeval now; - int t; - - rp->red_idle = 0; - microuptime(&now); - t = (now.tv_sec - rp->red_last.tv_sec); - if (t > 60) { - /* - * being idle for more than 1 minute, set avg to zero. - * this prevents t from overflow. - */ - avg = 0; - } else { - t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); - n = t / rp->red_pkttime - 1; - - /* the following line does (avg = (1 - Wq)^n * avg) */ - if (n > 0) - avg = (avg >> FP_SHIFT) * - pow_w(rp->red_wtab, n); - } - } - - /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ - avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); - rp->red_avg = avg; /* save the new value */ - - /* - * red_count keeps a tally of arriving traffic that has not - * been dropped. - */ - rp->red_count++; - - /* see if we drop early */ - droptype = DTYPE_NODROP; - if (avg >= rp->red_thmin_s && qlen(q) > 1) { - if (avg >= rp->red_thmax_s) { - /* avg >= th_max: forced drop */ - droptype = DTYPE_FORCED; - } else if (rp->red_old == 0) { - /* first exceeds th_min */ - rp->red_count = 1; - rp->red_old = 1; - } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, - rp->red_probd, rp->red_count)) { - /* mark or drop by red */ -#if PF_ECN - if ((rp->red_flags & REDF_ECN) && - (tag->pftag_proto == IPPROTO_TCP) && /* only TCP */ - mark_ecn(m, tag, rp->red_flags)) { - /* successfully marked. do not drop. */ - rp->red_count = 0; - rp->red_stats.marked_packets++; - } else -#endif /* PF_ECN */ - { - /* unforced drop by red */ - droptype = DTYPE_EARLY; - } - } - } else { - /* avg < th_min */ - rp->red_old = 0; - } - - /* - * if the queue length hits the hard limit, it's a forced drop. - */ - if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) - droptype = DTYPE_FORCED; - -#ifdef RED_RANDOM_DROP - /* if successful or forced drop, enqueue this packet. */ - if (droptype != DTYPE_EARLY) - _addq(q, m); -#else - /* if successful, enqueue this packet. */ - if (droptype == DTYPE_NODROP) - _addq(q, m); -#endif - if (droptype != DTYPE_NODROP) { - if (droptype == DTYPE_EARLY) { - /* drop the incoming packet */ - rp->red_stats.drop_unforced++; - } else { - /* forced drop, select a victim packet in the queue. */ -#ifdef RED_RANDOM_DROP - m = _getq_random(q); -#endif - rp->red_stats.drop_forced++; - } - rp->red_count = 0; - IFCQ_CONVERT_LOCK(&rp->red_ifp->if_snd); - m_freem(m); - return (CLASSQEQ_DROPPED); - } - /* successfully queued */ - return (CLASSQEQ_SUCCESS); -} - -/* - * early-drop probability is calculated as follows: - * prob = p_max * (avg - th_min) / (th_max - th_min) - * prob_a = prob / (2 - count*prob) - * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) - * here prob_a increases as successive undrop count increases. - * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), - * becomes 1 when (count >= (2 / prob))). - */ -int -drop_early(int fp_len, int fp_probd, int count) -{ - int d; /* denominator of drop-probability */ - - d = fp_probd - count * fp_len; - if (d <= 0) - /* count exceeds the hard limit: drop or mark */ - return (1); - - /* - * now the range of d is [1..600] in fixed-point. (when - * th_max-th_min=10 and p_max=1/30) - * drop probability = (avg - TH_MIN) / d - */ - - if ((RandomULong() % d) < (unsigned)fp_len) { - /* drop or mark */ - return (1); - } - /* no drop/mark */ - return (0); -} - -static struct mbuf * -red_getq_flow(struct red *rp, class_queue_t *q, u_int32_t flow, boolean_t purge) -{ -#pragma unused(purge) - struct mbuf *m; - - /* flow of 0 means head of queue */ - if ((m = ((flow == 0) ? _getq(q) : _getq_flow(q, flow))) == NULL) { - if (rp->red_idle == 0) { - rp->red_idle = 1; - microuptime(&rp->red_last); - } - return (NULL); - } - - rp->red_idle = 0; - return (m); -} - -struct mbuf * -red_getq(red_t *rp, class_queue_t *q) -{ - return (red_getq_flow(rp, q, 0, FALSE)); -} - -void -red_purgeq(struct red *rp, class_queue_t *q, u_int32_t flow, u_int32_t *packets, - u_int32_t *bytes) -{ - u_int32_t cnt = 0, len = 0; - struct mbuf *m; - - IFCQ_CONVERT_LOCK(&rp->red_ifp->if_snd); - - while ((m = red_getq_flow(rp, q, flow, TRUE)) != NULL) { - cnt++; - len += m_pktlen(m); - m_freem(m); - } - - if (packets != NULL) - *packets = cnt; - if (bytes != NULL) - *bytes = len; -} - -void -red_updateq(red_t *rp, cqev_t ev) -{ -#pragma unused(rp, ev) - /* nothing for now */ -} - -int -red_suspendq(red_t *rp, class_queue_t *q, boolean_t on) -{ -#pragma unused(rp, q, on) - return (ENOTSUP); -} - -/* - * helper routine to calibrate avg during idle. - * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point - * here Wq = 1/weight and the code assumes Wq is close to zero. - * - * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. - */ -static struct wtab *wtab_list = NULL; /* pointer to wtab list */ - -struct wtab * -wtab_alloc(int weight) -{ - struct wtab *w; - int i; - - for (w = wtab_list; w != NULL; w = w->w_next) - if (w->w_weight == weight) { - w->w_refcount++; - return (w); - } - - w = _MALLOC(sizeof (struct wtab), M_DEVBUF, M_WAITOK|M_ZERO); - if (w == NULL) - return (NULL); - - w->w_weight = weight; - w->w_refcount = 1; - w->w_next = wtab_list; - wtab_list = w; - - /* initialize the weight table */ - w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; - for (i = 1; i < 32; i++) { - w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; - if (w->w_tab[i] == 0 && w->w_param_max == 0) - w->w_param_max = 1 << i; - } - - return (w); -} - -void -wtab_destroy(struct wtab *w) -{ - struct wtab *prev; - - if (--w->w_refcount > 0) - return; - - if (wtab_list == w) - wtab_list = w->w_next; - else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) - if (prev->w_next == w) { - prev->w_next = w->w_next; - break; - } - - _FREE(w, M_DEVBUF); -} - -int32_t -pow_w(struct wtab *w, int n) -{ - int i, bit; - int32_t val; - - if (n >= w->w_param_max) - return (0); - - val = 1 << FP_SHIFT; - if (n <= 0) - return (val); - - bit = 1; - i = 0; - while (n) { - if (n & bit) { - val = (val * w->w_tab[i]) >> FP_SHIFT; - n &= ~bit; - } - i++; - bit <<= 1; - } - return (val); -} - -#endif /* CLASSQ_RED */