]>
Commit | Line | Data |
---|---|---|
39236c6e | 1 | /* |
f427ee49 | 2 | * Copyright (c) 2012-2020 Apple Inc. All rights reserved. |
39236c6e A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | /* | |
30 | * Flow Control and Feedback Advisory | |
31 | * | |
32 | * Each mbuf that is being sent out through an interface is tagged with a | |
33 | * unique 32-bit ID which will help to identify all the packets that belong | |
34 | * to a particular flow at the interface layer. Packets carrying such ID | |
35 | * would need to be marked with PKTF_FLOW_ID. Normally, this ID is computed | |
36 | * by the module that generates the flow. There are 3 kinds of flow sources | |
37 | * that are currently recognized: | |
38 | * | |
39 | * a. INPCB (INET/INET6 Protocol Control Block). When a socket is | |
40 | * connected, the flow hash for the socket is computed and stored in | |
41 | * the PCB. Further transmissions on the socket will cause the hash | |
42 | * value to be carried within the mbuf as the flow ID. | |
43 | * | |
44 | * b. Interface. When an interface is attached, the flow hash for the | |
45 | * interface is computed and stored in the ifnet. This value is | |
46 | * normally ignored for most network drivers, except for those that | |
47 | * reside atop another driver, e.g. a virtual interface performing | |
48 | * encapsulation/encryption on the original packet and sending the | |
49 | * newly-generated packet to another interface. Such interface needs | |
50 | * to associate all generated packets with the interface flow hash | |
51 | * value as the flow ID. | |
52 | * | |
53 | * c. PF (Packet Filter). When a packet goes through PF and it is not | |
54 | * already associated with a flow ID, PF will compute a flow hash and | |
55 | * store it in the packet as flow ID. When the packet is associated | |
56 | * with a PF state, the state record will have the flow ID stored | |
57 | * within, in order to avoid recalculating the flow hash. Although PF | |
58 | * is capable of generating flow IDs, it does not participate in flow | |
59 | * advisory, and therefore packets whose IDs are computed by PF will | |
60 | * not have their PKTF_FLOW_ADV packet flag set. | |
61 | * | |
62 | * Activation of flow advisory mechanism is done by setting the PKTF_FLOW_ADV | |
63 | * packet flag; because a flow ID is required, the mechanism will not take | |
64 | * place unless PKTF_FLOW_ID is set as well. The packet must also carry one | |
65 | * of the flow source types FLOWSRC_{INPCB,IFNET} in order to identify where | |
66 | * the flow advisory notification should be delivered to. As noted above, | |
67 | * FLOWSRC_PF does not participate in this mechanism. | |
68 | * | |
69 | * The classq module configured on the interface is responsible for exerting | |
70 | * flow control to the upper layers. This occurs when the number of packets | |
71 | * queued for a flow reaches a limit. The module generating the flow will | |
72 | * cease transmission until further flow advisory notice, and the flow will | |
73 | * be inserted into the classq's flow control list. | |
74 | * | |
75 | * When packets are dequeued from the classq and the number of packets for | |
76 | * a flow goes below a limit, the classq will transfer its flow control list | |
77 | * to the global fadv_list. This will then trigger the flow advisory thread | |
78 | * to run, which will cause the flow source modules to be notified that data | |
79 | * can now be generated for those previously flow-controlled flows. | |
80 | */ | |
81 | ||
82 | #include <sys/param.h> | |
83 | #include <sys/systm.h> | |
84 | #include <sys/kernel.h> | |
85 | #include <sys/mcache.h> | |
86 | #include <sys/mbuf.h> | |
87 | #include <sys/proc_internal.h> | |
88 | #include <sys/socketvar.h> | |
89 | ||
90 | #include <kern/assert.h> | |
91 | #include <kern/thread.h> | |
92 | #include <kern/locks.h> | |
93 | #include <kern/zalloc.h> | |
94 | ||
95 | #include <netinet/in_pcb.h> | |
96 | #include <net/flowadv.h> | |
97 | ||
98 | /* Lock group and attribute for fadv_lock */ | |
0a7de745 A |
99 | static lck_grp_t *fadv_lock_grp; |
100 | static lck_grp_attr_t *fadv_lock_grp_attr; | |
39236c6e A |
101 | decl_lck_mtx_data(static, fadv_lock); |
102 | ||
103 | /* protected by fadv_lock */ | |
104 | static STAILQ_HEAD(fadv_head, flowadv_fcentry) fadv_list; | |
105 | static thread_t fadv_thread = THREAD_NULL; | |
106 | static uint32_t fadv_active; | |
107 | ||
f427ee49 A |
108 | static unsigned int fadv_size; /* size of flowadv_fcentry */ |
109 | static struct mcache *fadv_cache; /* mcache for flowadv_fcentry */ | |
39236c6e | 110 | |
f427ee49 | 111 | #define FADV_CACHE_NAME "flowadv" /* cache name */ |
39236c6e A |
112 | |
113 | static int flowadv_thread_cont(int); | |
114 | static void flowadv_thread_func(void *, wait_result_t); | |
115 | ||
116 | void | |
117 | flowadv_init(void) | |
118 | { | |
119 | STAILQ_INIT(&fadv_list); | |
120 | ||
121 | /* Setup lock group and attribute for fadv_lock */ | |
122 | fadv_lock_grp_attr = lck_grp_attr_alloc_init(); | |
123 | fadv_lock_grp = lck_grp_alloc_init("fadv_lock", fadv_lock_grp_attr); | |
124 | lck_mtx_init(&fadv_lock, fadv_lock_grp, NULL); | |
125 | ||
f427ee49 A |
126 | fadv_size = sizeof(struct flowadv_fcentry); |
127 | fadv_cache = mcache_create(FADV_CACHE_NAME, fadv_size, | |
128 | sizeof(uint64_t), 0, MCR_SLEEP); | |
39236c6e A |
129 | |
130 | if (kernel_thread_start(flowadv_thread_func, NULL, &fadv_thread) != | |
131 | KERN_SUCCESS) { | |
132 | panic("%s: couldn't create flow event advisory thread", | |
133 | __func__); | |
134 | /* NOTREACHED */ | |
135 | } | |
136 | thread_deallocate(fadv_thread); | |
137 | } | |
138 | ||
139 | struct flowadv_fcentry * | |
140 | flowadv_alloc_entry(int how) | |
141 | { | |
142 | struct flowadv_fcentry *fce; | |
143 | ||
f427ee49 A |
144 | if ((fce = mcache_alloc(fadv_cache, (how == M_WAITOK) ? |
145 | MCR_SLEEP : MCR_NOSLEEP)) != NULL) { | |
146 | bzero(fce, fadv_size); | |
0a7de745 | 147 | } |
39236c6e | 148 | |
0a7de745 | 149 | return fce; |
39236c6e A |
150 | } |
151 | ||
152 | void | |
153 | flowadv_free_entry(struct flowadv_fcentry *fce) | |
154 | { | |
f427ee49 | 155 | mcache_free(fadv_cache, fce); |
39236c6e A |
156 | } |
157 | ||
158 | void | |
159 | flowadv_add(struct flowadv_fclist *fcl) | |
160 | { | |
0a7de745 | 161 | if (STAILQ_EMPTY(fcl)) { |
39236c6e | 162 | return; |
0a7de745 | 163 | } |
39236c6e A |
164 | |
165 | lck_mtx_lock_spin(&fadv_lock); | |
166 | ||
167 | STAILQ_CONCAT(&fadv_list, fcl); | |
168 | VERIFY(!STAILQ_EMPTY(&fadv_list)); | |
169 | ||
0a7de745 | 170 | if (!fadv_active && fadv_thread != THREAD_NULL) { |
39236c6e | 171 | wakeup_one((caddr_t)&fadv_list); |
0a7de745 | 172 | } |
39236c6e A |
173 | |
174 | lck_mtx_unlock(&fadv_lock); | |
175 | } | |
176 | ||
39037602 | 177 | void |
0a7de745 A |
178 | flowadv_add_entry(struct flowadv_fcentry *fce) |
179 | { | |
39037602 A |
180 | lck_mtx_lock_spin(&fadv_lock); |
181 | STAILQ_INSERT_HEAD(&fadv_list, fce, fce_link); | |
182 | VERIFY(!STAILQ_EMPTY(&fadv_list)); | |
183 | ||
0a7de745 | 184 | if (!fadv_active && fadv_thread != THREAD_NULL) { |
39037602 | 185 | wakeup_one((caddr_t)&fadv_list); |
0a7de745 | 186 | } |
39037602 A |
187 | |
188 | lck_mtx_unlock(&fadv_lock); | |
189 | } | |
190 | ||
39236c6e A |
191 | static int |
192 | flowadv_thread_cont(int err) | |
193 | { | |
194 | #pragma unused(err) | |
195 | for (;;) { | |
5ba3f43e | 196 | LCK_MTX_ASSERT(&fadv_lock, LCK_MTX_ASSERT_OWNED); |
39236c6e A |
197 | while (STAILQ_EMPTY(&fadv_list)) { |
198 | VERIFY(!fadv_active); | |
199 | (void) msleep0(&fadv_list, &fadv_lock, (PSOCK | PSPIN), | |
200 | "flowadv_cont", 0, flowadv_thread_cont); | |
201 | /* NOTREACHED */ | |
202 | } | |
203 | ||
204 | fadv_active = 1; | |
205 | for (;;) { | |
206 | struct flowadv_fcentry *fce; | |
207 | ||
208 | VERIFY(!STAILQ_EMPTY(&fadv_list)); | |
209 | fce = STAILQ_FIRST(&fadv_list); | |
210 | STAILQ_REMOVE(&fadv_list, fce, | |
211 | flowadv_fcentry, fce_link); | |
212 | STAILQ_NEXT(fce, fce_link) = NULL; | |
213 | ||
214 | lck_mtx_unlock(&fadv_lock); | |
5ba3f43e | 215 | switch (fce->fce_flowsrc_type) { |
39236c6e A |
216 | case FLOWSRC_INPCB: |
217 | inp_flowadv(fce->fce_flowid); | |
218 | break; | |
219 | ||
220 | case FLOWSRC_IFNET: | |
221 | ifnet_flowadv(fce->fce_flowid); | |
222 | break; | |
223 | ||
5ba3f43e | 224 | |
39236c6e A |
225 | case FLOWSRC_PF: |
226 | default: | |
227 | break; | |
228 | } | |
229 | flowadv_free_entry(fce); | |
230 | lck_mtx_lock_spin(&fadv_lock); | |
231 | ||
232 | /* if there's no pending request, we're done */ | |
0a7de745 | 233 | if (STAILQ_EMPTY(&fadv_list)) { |
39236c6e | 234 | break; |
0a7de745 | 235 | } |
39236c6e A |
236 | } |
237 | fadv_active = 0; | |
238 | } | |
239 | } | |
240 | ||
cb323159 | 241 | __dead2 |
39236c6e A |
242 | static void |
243 | flowadv_thread_func(void *v, wait_result_t w) | |
244 | { | |
245 | #pragma unused(v, w) | |
246 | lck_mtx_lock(&fadv_lock); | |
247 | (void) msleep0(&fadv_list, &fadv_lock, (PSOCK | PSPIN), | |
248 | "flowadv", 0, flowadv_thread_cont); | |
249 | /* | |
250 | * msleep0() shouldn't have returned as PCATCH was not set; | |
251 | * therefore assert in this case. | |
252 | */ | |
253 | lck_mtx_unlock(&fadv_lock); | |
254 | VERIFY(0); | |
255 | } | |
f427ee49 A |
256 | |
257 | void | |
258 | flowadv_reap_caches(boolean_t purge) | |
259 | { | |
260 | mcache_reap_now(fadv_cache, purge); | |
261 | } |