2  * Copyright (c) 2016-2018 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29 #include <sys/cdefs.h> 
  30 #include <sys/param.h> 
  32 #include <sys/socket.h> 
  33 #include <sys/sockio.h> 
  34 #include <sys/systm.h> 
  35 #include <sys/sysctl.h> 
  36 #include <sys/syslog.h> 
  38 #include <sys/errno.h> 
  39 #include <sys/kernel.h> 
  40 #include <sys/kauth.h> 
  41 #include <kern/zalloc.h> 
  42 #include <netinet/in.h> 
  44 #include <net/classq/classq.h> 
  45 #include <net/classq/if_classq.h> 
  46 #include <net/pktsched/pktsched.h> 
  47 #include <net/pktsched/pktsched_fq_codel.h> 
  48 #include <net/classq/classq_fq_codel.h> 
  50 static uint32_t flowq_size
;                     /* size of flowq */ 
  51 static struct mcache 
*flowq_cache 
= NULL
;       /* mcache for flowq */ 
  53 #define FQ_ZONE_MAX     (32 * 1024)     /* across all interfaces */ 
  55 #define DTYPE_NODROP    0       /* no drop */ 
  56 #define DTYPE_FORCED    1       /* a "forced" drop */ 
  57 #define DTYPE_EARLY     2       /* an "unforced" (early) drop */ 
  62         if (flowq_cache 
!= NULL
) { 
  66         flowq_size 
= sizeof(fq_t
); 
  67         flowq_cache 
= mcache_create("fq.flowq", flowq_size
, sizeof(uint64_t), 
  69         if (flowq_cache 
== NULL
) { 
  70                 panic("%s: failed to allocate flowq_cache", __func__
); 
  72                 __builtin_unreachable(); 
  77 fq_codel_reap_caches(boolean_t purge
) 
  79         mcache_reap_now(flowq_cache
, purge
); 
  83 fq_alloc(classq_pkt_type_t ptype
) 
  86         fq 
= mcache_alloc(flowq_cache
, MCR_SLEEP
); 
  88                 log(LOG_ERR
, "%s: unable to allocate from flowq_cache\n"); 
  92         bzero(fq
, flowq_size
); 
  94         if (ptype 
== QP_MBUF
) { 
  95                 MBUFQ_INIT(&fq
->fq_mbufq
); 
 103         VERIFY(fq_empty(fq
)); 
 104         VERIFY(!(fq
->fq_flags 
& (FQF_NEW_FLOW 
| FQF_OLD_FLOW
))); 
 105         VERIFY(fq
->fq_bytes 
== 0); 
 106         mcache_free(flowq_cache
, fq
); 
 110 fq_detect_dequeue_stall(fq_if_t 
*fqs
, fq_t 
*flowq
, fq_if_classq_t 
*fq_cl
, 
 113         u_int64_t maxgetqtime
; 
 114         if (FQ_IS_DELAYHIGH(flowq
) || flowq
->fq_getqtime 
== 0 || 
 116             flowq
->fq_bytes 
< FQ_MIN_FC_THRESHOLD_BYTES
) { 
 119         maxgetqtime 
= flowq
->fq_getqtime 
+ fqs
->fqs_update_interval
; 
 120         if ((*now
) > maxgetqtime
) { 
 122                  * there was no dequeue in an update interval worth of 
 123                  * time. It means that the queue is stalled. 
 125                 FQ_SET_DELAY_HIGH(flowq
); 
 126                 fq_cl
->fcl_stat
.fcl_dequeue_stall
++; 
 131 fq_head_drop(fq_if_t 
*fqs
, fq_t 
*fq
) 
 134         volatile uint32_t *pkt_flags
; 
 135         uint64_t *pkt_timestamp
; 
 136         struct ifclassq 
*ifq 
= fqs
->fqs_ifq
; 
 138         _PKTSCHED_PKT_INIT(&pkt
); 
 139         fq_getq_flow_internal(fqs
, fq
, &pkt
); 
 140         if (pkt
.pktsched_pkt_mbuf 
== NULL
) { 
 144         pktsched_get_pkt_vars(&pkt
, &pkt_flags
, &pkt_timestamp
, NULL
, NULL
, 
 148         switch (pkt
.pktsched_ptype
) { 
 150                 *pkt_flags 
&= ~PKTF_PRIV_GUARDED
; 
 155                 __builtin_unreachable(); 
 158         IFCQ_DROP_ADD(ifq
, 1, pktsched_get_pkt_len(&pkt
)); 
 159         IFCQ_CONVERT_LOCK(ifq
); 
 160         pktsched_free_pkt(&pkt
); 
 164 fq_addq(fq_if_t 
*fqs
, pktsched_pkt_t 
*pkt
, fq_if_classq_t 
*fq_cl
) 
 166         int droptype 
= DTYPE_NODROP
, fc_adv 
= 0, ret 
= CLASSQEQ_SUCCESS
; 
 169         uint64_t *pkt_timestamp
; 
 170         volatile uint32_t *pkt_flags
; 
 171         uint32_t pkt_flowid
, pkt_tx_start_seq
; 
 172         uint8_t pkt_proto
, pkt_flowsrc
; 
 174         pktsched_get_pkt_vars(pkt
, &pkt_flags
, &pkt_timestamp
, &pkt_flowid
, 
 175             &pkt_flowsrc
, &pkt_proto
, &pkt_tx_start_seq
); 
 177         switch (pkt
->pktsched_ptype
) { 
 179                 /* See comments in <rdar://problem/14040693> */ 
 180                 VERIFY(!(*pkt_flags 
& PKTF_PRIV_GUARDED
)); 
 181                 *pkt_flags 
|= PKTF_PRIV_GUARDED
; 
 186                 __builtin_unreachable(); 
 189         if (*pkt_timestamp 
> 0) { 
 190                 now 
= *pkt_timestamp
; 
 192                 struct timespec now_ts
; 
 194                 now 
= (now_ts
.tv_sec 
* NSEC_PER_SEC
) + now_ts
.tv_nsec
; 
 195                 *pkt_timestamp 
= now
; 
 198         /* find the flowq for this packet */ 
 199         fq 
= fq_if_hash_pkt(fqs
, pkt_flowid
, pktsched_get_pkt_svc(pkt
), 
 200             now
, TRUE
, pkt
->pktsched_ptype
); 
 202                 /* drop the packet if we could not allocate a flow queue */ 
 203                 fq_cl
->fcl_stat
.fcl_drop_memfailure
++; 
 204                 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
); 
 205                 return CLASSQEQ_DROP
; 
 207         VERIFY(fq
->fq_ptype 
== pkt
->pktsched_ptype
); 
 209         fq_detect_dequeue_stall(fqs
, fq
, fq_cl
, &now
); 
 211         if (FQ_IS_DELAYHIGH(fq
)) { 
 212                 if ((fq
->fq_flags 
& FQF_FLOWCTL_CAPABLE
) && 
 213                     (*pkt_flags 
& PKTF_FLOW_ADV
)) { 
 216                          * If the flow is suspended or it is not 
 217                          * TCP/QUIC, drop the packet 
 219                         if ((pkt_proto 
!= IPPROTO_TCP
) && 
 220                             (pkt_proto 
!= IPPROTO_QUIC
)) { 
 221                                 droptype 
= DTYPE_EARLY
; 
 222                                 fq_cl
->fcl_stat
.fcl_drop_early
++; 
 226                          * Need to drop a packet, instead of dropping this 
 227                          * one, try to drop from the head of the queue 
 230                                 fq_head_drop(fqs
, fq
); 
 231                                 droptype 
= DTYPE_NODROP
; 
 233                                 droptype 
= DTYPE_EARLY
; 
 235                         fq_cl
->fcl_stat
.fcl_drop_early
++; 
 239         /* Set the return code correctly */ 
 240         if (fc_adv 
== 1 && droptype 
!= DTYPE_FORCED
) { 
 241                 if (fq_if_add_fcentry(fqs
, pkt
, pkt_flowid
, pkt_flowsrc
, 
 243                         fq
->fq_flags 
|= FQF_FLOWCTL_ON
; 
 244                         /* deliver flow control advisory error */ 
 245                         if (droptype 
== DTYPE_NODROP
) { 
 246                                 ret 
= CLASSQEQ_SUCCESS_FC
; 
 248                                 /* dropped due to flow control */ 
 249                                 ret 
= CLASSQEQ_DROP_FC
; 
 253                          * if we could not flow control the flow, it is 
 256                         droptype 
= DTYPE_FORCED
; 
 257                         ret 
= CLASSQEQ_DROP_FC
; 
 258                         fq_cl
->fcl_stat
.fcl_flow_control_fail
++; 
 263          * If the queue length hits the queue limit, drop a packet from the 
 264          * front of the queue for a flow with maximum number of bytes. This 
 265          * will penalize heavy and unresponsive flows. It will also avoid a 
 268         if (droptype 
== DTYPE_NODROP 
&& fq_if_at_drop_limit(fqs
)) { 
 269                 if (fqs
->fqs_large_flow 
== fq
) { 
 271                          * Drop from the head of the current fq. Since a 
 272                          * new packet will be added to the tail, it is ok 
 273                          * to leave fq in place. 
 275                         fq_head_drop(fqs
, fq
); 
 277                         if (fqs
->fqs_large_flow 
== NULL
) { 
 278                                 droptype 
= DTYPE_FORCED
; 
 279                                 fq_cl
->fcl_stat
.fcl_drop_overflow
++; 
 283                                  * if this fq was freshly created and there 
 284                                  * is nothing to enqueue, free it 
 286                                 if (fq_empty(fq
) && !(fq
->fq_flags 
& 
 287                                     (FQF_NEW_FLOW 
| FQF_OLD_FLOW
))) { 
 288                                         fq_if_destroy_flow(fqs
, fq_cl
, fq
); 
 292                                 fq_if_drop_packet(fqs
); 
 297         if (droptype 
== DTYPE_NODROP
) { 
 298                 uint32_t pkt_len 
= pktsched_get_pkt_len(pkt
); 
 299                 fq_enqueue(fq
, pkt
->pktsched_pkt
); 
 300                 fq
->fq_bytes 
+= pkt_len
; 
 301                 fq_cl
->fcl_stat
.fcl_byte_cnt 
+= pkt_len
; 
 302                 fq_cl
->fcl_stat
.fcl_pkt_cnt
++; 
 305                  * check if this queue will qualify to be the next 
 308                 fq_if_is_flow_heavy(fqs
, fq
); 
 310                 IFCQ_CONVERT_LOCK(fqs
->fqs_ifq
); 
 311                 return (ret 
!= CLASSQEQ_SUCCESS
) ? ret 
: CLASSQEQ_DROP
; 
 315          * If the queue is not currently active, add it to the end of new 
 316          * flows list for that service class. 
 318         if ((fq
->fq_flags 
& (FQF_NEW_FLOW 
| FQF_OLD_FLOW
)) == 0) { 
 319                 VERIFY(STAILQ_NEXT(fq
, fq_actlink
) == NULL
); 
 320                 STAILQ_INSERT_TAIL(&fq_cl
->fcl_new_flows
, fq
, fq_actlink
); 
 321                 fq
->fq_flags 
|= FQF_NEW_FLOW
; 
 323                 fq_cl
->fcl_stat
.fcl_newflows_cnt
++; 
 325                 fq
->fq_deficit 
= fq_cl
->fcl_quantum
; 
 331 fq_getq_flow_internal(fq_if_t 
*fqs
, fq_t 
*fq
, pktsched_pkt_t 
*pkt
) 
 333         classq_pkt_t p 
= CLASSQ_PKT_INITIALIZER(p
); 
 335         fq_if_classq_t 
*fq_cl
; 
 336         struct ifclassq 
*ifq 
= fqs
->fqs_ifq
; 
 339         if (p
.cp_ptype 
== QP_INVALID
) { 
 340                 VERIFY(p
.cp_mbuf 
== NULL
); 
 344         pktsched_pkt_encap(pkt
, &p
); 
 345         plen 
= pktsched_get_pkt_len(pkt
); 
 347         VERIFY(fq
->fq_bytes 
>= plen
); 
 348         fq
->fq_bytes 
-= plen
; 
 350         fq_cl 
= &fqs
->fqs_classq
[fq
->fq_sc_index
]; 
 351         fq_cl
->fcl_stat
.fcl_byte_cnt 
-= plen
; 
 352         fq_cl
->fcl_stat
.fcl_pkt_cnt
--; 
 354         IFCQ_DEC_BYTES(ifq
, plen
); 
 356         /* Reset getqtime so that we don't count idle times */ 
 363 fq_getq_flow(fq_if_t 
*fqs
, fq_t 
*fq
, pktsched_pkt_t 
*pkt
) 
 365         fq_if_classq_t 
*fq_cl
; 
 368         struct timespec now_ts
; 
 369         volatile uint32_t *pkt_flags
; 
 370         uint32_t pkt_tx_start_seq
; 
 371         uint64_t *pkt_timestamp
; 
 373         fq_getq_flow_internal(fqs
, fq
, pkt
); 
 374         if (pkt
->pktsched_ptype 
== QP_INVALID
) { 
 375                 VERIFY(pkt
->pktsched_pkt_mbuf 
== NULL
); 
 379         pktsched_get_pkt_vars(pkt
, &pkt_flags
, &pkt_timestamp
, NULL
, NULL
, 
 380             NULL
, &pkt_tx_start_seq
); 
 383         now 
= (now_ts
.tv_sec 
* NSEC_PER_SEC
) + now_ts
.tv_nsec
; 
 385         /* this will compute qdelay in nanoseconds */ 
 386         if (now 
> *pkt_timestamp
) { 
 387                 qdelay 
= now 
- *pkt_timestamp
; 
 389         fq_cl 
= &fqs
->fqs_classq
[fq
->fq_sc_index
]; 
 391         if (fq
->fq_min_qdelay 
== 0 || 
 392             (qdelay 
> 0 && (u_int64_t
)qdelay 
< fq
->fq_min_qdelay
)) { 
 393                 fq
->fq_min_qdelay 
= qdelay
; 
 395         if (now 
>= fq
->fq_updatetime
) { 
 396                 if (fq
->fq_min_qdelay 
> fqs
->fqs_target_qdelay
) { 
 397                         if (!FQ_IS_DELAYHIGH(fq
)) { 
 398                                 FQ_SET_DELAY_HIGH(fq
); 
 401                         FQ_CLEAR_DELAY_HIGH(fq
); 
 403                 /* Reset measured queue delay and update time */ 
 404                 fq
->fq_updatetime 
= now 
+ fqs
->fqs_update_interval
; 
 405                 fq
->fq_min_qdelay 
= 0; 
 407         if (!FQ_IS_DELAYHIGH(fq
) || fq_empty(fq
)) { 
 408                 FQ_CLEAR_DELAY_HIGH(fq
); 
 409                 if (fq
->fq_flags 
& FQF_FLOWCTL_ON
) { 
 410                         fq_if_flow_feedback(fqs
, fq
, fq_cl
); 
 415                 /* Reset getqtime so that we don't count idle times */ 
 418                 fq
->fq_getqtime 
= now
; 
 420         fq_if_is_flow_heavy(fqs
, fq
); 
 423         switch (pkt
->pktsched_ptype
) { 
 425                 *pkt_flags 
&= ~PKTF_PRIV_GUARDED
; 
 430                 __builtin_unreachable();