]>
git.saurik.com Git - apple/xnu.git/blob - bsd/net/flowadv.c
2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * Flow Control and Feedback Advisory
32 * Each mbuf that is being sent out through an interface is tagged with a
33 * unique 32-bit ID which will help to identify all the packets that belong
34 * to a particular flow at the interface layer. Packets carrying such ID
35 * would need to be marked with PKTF_FLOW_ID. Normally, this ID is computed
36 * by the module that generates the flow. There are 3 kinds of flow sources
37 * that are currently recognized:
39 * a. INPCB (INET/INET6 Protocol Control Block). When a socket is
40 * connected, the flow hash for the socket is computed and stored in
41 * the PCB. Further transmissions on the socket will cause the hash
42 * value to be carried within the mbuf as the flow ID.
44 * b. Interface. When an interface is attached, the flow hash for the
45 * interface is computed and stored in the ifnet. This value is
46 * normally ignored for most network drivers, except for those that
47 * reside atop another driver, e.g. a virtual interface performing
48 * encapsulation/encryption on the original packet and sending the
49 * newly-generated packet to another interface. Such interface needs
50 * to associate all generated packets with the interface flow hash
51 * value as the flow ID.
53 * c. PF (Packet Filter). When a packet goes through PF and it is not
54 * already associated with a flow ID, PF will compute a flow hash and
55 * store it in the packet as flow ID. When the packet is associated
56 * with a PF state, the state record will have the flow ID stored
57 * within, in order to avoid recalculating the flow hash. Although PF
58 * is capable of generating flow IDs, it does not participate in flow
59 * advisory, and therefore packets whose IDs are computed by PF will
60 * not have their PKTF_FLOW_ADV packet flag set.
62 * Activation of flow advisory mechanism is done by setting the PKTF_FLOW_ADV
63 * packet flag; because a flow ID is required, the mechanism will not take
64 * place unless PKTF_FLOW_ID is set as well. The packet must also carry one
65 * of the flow source types FLOWSRC_{INPCB,IFNET} in order to identify where
66 * the flow advisory notification should be delivered to. As noted above,
67 * FLOWSRC_PF does not participate in this mechanism.
69 * The classq module configured on the interface is responsible for exerting
70 * flow control to the upper layers. This occurs when the number of packets
71 * queued for a flow reaches a limit. The module generating the flow will
72 * cease transmission until further flow advisory notice, and the flow will
73 * be inserted into the classq's flow control list.
75 * When packets are dequeued from the classq and the number of packets for
76 * a flow goes below a limit, the classq will transfer its flow control list
77 * to the global fadv_list. This will then trigger the flow advisory thread
78 * to run, which will cause the flow source modules to be notified that data
79 * can now be generated for those previously flow-controlled flows.
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/kernel.h>
85 #include <sys/mcache.h>
87 #include <sys/proc_internal.h>
88 #include <sys/socketvar.h>
90 #include <kern/assert.h>
91 #include <kern/thread.h>
92 #include <kern/locks.h>
93 #include <kern/zalloc.h>
95 #include <netinet/in_pcb.h>
96 #include <net/flowadv.h>
98 /* Lock group and attribute for fadv_lock */
99 static lck_grp_t
*fadv_lock_grp
;
100 static lck_grp_attr_t
*fadv_lock_grp_attr
;
101 decl_lck_mtx_data(static, fadv_lock
);
103 /* protected by fadv_lock */
104 static STAILQ_HEAD(fadv_head
, flowadv_fcentry
) fadv_list
;
105 static thread_t fadv_thread
= THREAD_NULL
;
106 static uint32_t fadv_active
;
108 static unsigned int fadv_zone_size
; /* size of flowadv_fcentry */
109 static struct zone
*fadv_zone
; /* zone for flowadv_fcentry */
111 #define FADV_ZONE_MAX 32 /* maximum elements in zone */
112 #define FADV_ZONE_NAME "fadv_zone" /* zone name */
114 static int flowadv_thread_cont(int);
115 static void flowadv_thread_func(void *, wait_result_t
);
120 STAILQ_INIT(&fadv_list
);
122 /* Setup lock group and attribute for fadv_lock */
123 fadv_lock_grp_attr
= lck_grp_attr_alloc_init();
124 fadv_lock_grp
= lck_grp_alloc_init("fadv_lock", fadv_lock_grp_attr
);
125 lck_mtx_init(&fadv_lock
, fadv_lock_grp
, NULL
);
127 fadv_zone_size
= P2ROUNDUP(sizeof (struct flowadv_fcentry
),
129 fadv_zone
= zinit(fadv_zone_size
,
130 FADV_ZONE_MAX
* fadv_zone_size
, 0, FADV_ZONE_NAME
);
131 if (fadv_zone
== NULL
) {
132 panic("%s: failed allocating %s", __func__
, FADV_ZONE_NAME
);
135 zone_change(fadv_zone
, Z_EXPAND
, TRUE
);
136 zone_change(fadv_zone
, Z_CALLERACCT
, FALSE
);
138 if (kernel_thread_start(flowadv_thread_func
, NULL
, &fadv_thread
) !=
140 panic("%s: couldn't create flow event advisory thread",
144 thread_deallocate(fadv_thread
);
147 struct flowadv_fcentry
*
148 flowadv_alloc_entry(int how
)
150 struct flowadv_fcentry
*fce
;
152 fce
= (how
== M_WAITOK
) ? zalloc(fadv_zone
) : zalloc_noblock(fadv_zone
);
154 bzero(fce
, fadv_zone_size
);
160 flowadv_free_entry(struct flowadv_fcentry
*fce
)
162 zfree(fadv_zone
, fce
);
166 flowadv_add(struct flowadv_fclist
*fcl
)
168 if (STAILQ_EMPTY(fcl
))
171 lck_mtx_lock_spin(&fadv_lock
);
173 STAILQ_CONCAT(&fadv_list
, fcl
);
174 VERIFY(!STAILQ_EMPTY(&fadv_list
));
176 if (!fadv_active
&& fadv_thread
!= THREAD_NULL
)
177 wakeup_one((caddr_t
)&fadv_list
);
179 lck_mtx_unlock(&fadv_lock
);
183 flowadv_add_entry(struct flowadv_fcentry
*fce
) {
184 lck_mtx_lock_spin(&fadv_lock
);
185 STAILQ_INSERT_HEAD(&fadv_list
, fce
, fce_link
);
186 VERIFY(!STAILQ_EMPTY(&fadv_list
));
188 if (!fadv_active
&& fadv_thread
!= THREAD_NULL
)
189 wakeup_one((caddr_t
)&fadv_list
);
191 lck_mtx_unlock(&fadv_lock
);
195 flowadv_thread_cont(int err
)
199 LCK_MTX_ASSERT(&fadv_lock
, LCK_MTX_ASSERT_OWNED
);
200 while (STAILQ_EMPTY(&fadv_list
)) {
201 VERIFY(!fadv_active
);
202 (void) msleep0(&fadv_list
, &fadv_lock
, (PSOCK
| PSPIN
),
203 "flowadv_cont", 0, flowadv_thread_cont
);
209 struct flowadv_fcentry
*fce
;
211 VERIFY(!STAILQ_EMPTY(&fadv_list
));
212 fce
= STAILQ_FIRST(&fadv_list
);
213 STAILQ_REMOVE(&fadv_list
, fce
,
214 flowadv_fcentry
, fce_link
);
215 STAILQ_NEXT(fce
, fce_link
) = NULL
;
217 lck_mtx_unlock(&fadv_lock
);
218 switch (fce
->fce_flowsrc_type
) {
220 inp_flowadv(fce
->fce_flowid
);
224 ifnet_flowadv(fce
->fce_flowid
);
232 flowadv_free_entry(fce
);
233 lck_mtx_lock_spin(&fadv_lock
);
235 /* if there's no pending request, we're done */
236 if (STAILQ_EMPTY(&fadv_list
))
244 flowadv_thread_func(void *v
, wait_result_t w
)
247 lck_mtx_lock(&fadv_lock
);
248 (void) msleep0(&fadv_list
, &fadv_lock
, (PSOCK
| PSPIN
),
249 "flowadv", 0, flowadv_thread_cont
);
251 * msleep0() shouldn't have returned as PCATCH was not set;
252 * therefore assert in this case.
254 lck_mtx_unlock(&fadv_lock
);