]>
Commit | Line | Data |
---|---|---|
1c79356b A |
1 | /* |
2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. | |
3 | * | |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | * | |
31 | */ | |
32 | /* | |
33 | * HISTORY | |
34 | * | |
35 | * Revision 1.1.1.1 1998/09/22 21:05:29 wsanchez | |
36 | * Import of Mac OS X kernel (~semeria) | |
37 | * | |
38 | * Revision 1.1.1.1 1998/03/07 02:25:45 wsanchez | |
39 | * Import of OSF Mach kernel (~mburg) | |
40 | * | |
41 | * Revision 1.1.11.1 1996/09/17 16:34:42 bruel | |
42 | * fixed types. | |
43 | * [96/09/17 bruel] | |
44 | * | |
45 | * Revision 1.1.6.1 1995/06/13 18:20:10 sjs | |
46 | * Merged from flipc_shared. | |
47 | * [95/06/07 sjs] | |
48 | * | |
49 | * Revision 1.1.3.14 1995/05/19 00:58:14 sjs | |
50 | * Added send_ready to shared area, used for fast check if there is something | |
51 | * to do (and prevents the cache from getting stirred). | |
52 | * [95/05/18 sjs] | |
53 | * | |
54 | * Revision 1.1.3.13 1995/05/16 20:46:28 randys | |
55 | * Export performance valid information through performance | |
56 | * structure rather than kernel configuration section. | |
57 | * [95/05/16 randys] | |
58 | * | |
59 | * Added performance (FLIPC_PERF) config information to | |
60 | * kernel_configuration section of comm buffer, so that user | |
61 | * programs can find out if this information is being gathered. | |
62 | * [95/05/16 randys] | |
63 | * | |
64 | * Revision 1.1.3.12 1995/05/15 14:26:54 randys | |
65 | * Updated comments on use of acquire pointer (it's completely | |
66 | * ignored if dpb is set) and added macros for testing !dpb and | |
67 | * enabled at the same time. | |
68 | * [95/05/11 randys] | |
69 | * | |
70 | * Change pme_process_ptr ==> sme_process_ptr (since it's being read | |
71 | * by AIL now). | |
72 | * [95/05/11 randys] | |
73 | * | |
74 | * Added private copied of release and process pointers. | |
75 | * [95/05/11 randys] | |
76 | * | |
77 | * Rearrange endpoint structure to separate data with importantly | |
78 | * different access patterns into different cache lines. This | |
79 | * involved duplicating some (effectively constant) entries, and | |
80 | * creating two versions of some macros. | |
81 | * [95/05/11 randys] | |
82 | * | |
83 | * Revision 1.1.3.11 1995/05/08 16:06:33 randys | |
84 | * Added comment explaining that an endpoint bufferlist must always | |
85 | * have valid buffer pointers in all of its entries, to keep | |
86 | * FLIPC_endpoint_buffer_available from going off the deep end. No | |
87 | * code changes. | |
88 | * [95/04/18 randys] | |
89 | * | |
90 | * Revision 1.1.3.10 1995/04/05 21:21:52 randys | |
91 | * Added a field to the buffer control structure holding the | |
92 | * scheduling policy chosen for the allocations lock. | |
93 | * [95/04/05 randys] | |
94 | * | |
95 | * Revision 1.1.3.9 1995/03/23 20:35:19 randys | |
96 | * Added comments indicating duplication of declarations of | |
97 | * flipc_cb_base & flipc_cb_size in this file and in flipc_usermsg.h | |
98 | * Modified declaration of flipc_cb_size to be unsigned long. | |
99 | * [95/03/21 randys] | |
100 | * | |
101 | * Revision 1.1.3.8 1995/02/23 21:32:42 randys | |
102 | * Added space for kernel configuration in communications buffer | |
103 | * control structure. | |
104 | * [95/02/22 randys] | |
105 | * | |
106 | * Revision 1.1.3.7 1995/02/21 17:22:58 randys | |
107 | * Re-indented code to four space indentation | |
108 | * [1995/02/21 16:25:32 randys] | |
109 | * | |
110 | * Revision 1.1.3.6 1995/02/13 22:57:29 randys | |
111 | * Replaced all of NEXT_{ACQUIRE,RELEASE,PROCESS}_PTR macros with a | |
112 | * single NEXT_BUFFERLIST_PTR macro. | |
113 | * [95/02/03 randys] | |
114 | * | |
115 | * Revision 1.1.3.5 1995/01/26 21:01:44 randys | |
116 | * Add performance structure into CB. | |
117 | * [1995/01/24 21:14:31 randys] | |
118 | * | |
119 | * Added flag in epgroup structure to note that epgroup | |
120 | * has a semaphore associated with it. | |
121 | * [1995/01/19 23:02:13 randys] | |
122 | * | |
123 | * Add a space in the comm buffer header for the null_destination | |
124 | * the ME sets up for the AIL. Get rid of | |
125 | * FLIPC_ADDRESS_ENDPOINT_PTR (it isn't used) | |
126 | * [1995/01/19 20:22:30 randys] | |
127 | * | |
128 | * Up the comm buffer size to 1 megabyte | |
129 | * [1995/01/17 22:23:27 randys] | |
130 | * | |
131 | * Revision 1.1.3.4 1995/01/12 21:19:01 randys | |
132 | * Minor commenting changes from dlb | |
133 | * [1995/01/06 18:18:12 randys] | |
134 | * | |
135 | * Revision 1.1.3.3 1994/12/22 16:23:57 randys | |
136 | * Fixed calculation of number of buffers on an endpoint | |
137 | * to take size of buffer pointers into account. | |
138 | * [1994/12/21 16:19:55 randys] | |
139 | * | |
140 | * Revision 1.1.3.2 1994/12/20 19:01:56 randys | |
141 | * Moved definition of flipc_simple_lock to flipc_cb.h | |
142 | * [1994/12/20 17:34:41 randys] | |
143 | * | |
144 | * Added a simple lock in the comm buffer to use for the | |
145 | * allocations lock, along with directions as to how | |
146 | * to use it (not like a normal simple lock). | |
147 | * [1994/12/20 15:27:25 randys] | |
148 | * | |
149 | * Added error log into communications buffer control | |
150 | * structure, and changed FLIPC_ADDRESS_ENDPOINT_PTR to | |
151 | * correctly compute the endpoint pointer based on the | |
152 | * new ctl structure layout. | |
153 | * [1994/12/19 23:47:45 randys] | |
154 | * | |
155 | * Added filename in comment at top of each file | |
156 | * [1994/12/19 20:28:20 randys] | |
157 | * | |
158 | * Add version field to epgroup to check races on buffer acquire | |
159 | * from epgroup. | |
160 | * [1994/12/19 18:05:04 randys] | |
161 | * | |
162 | * Revision 1.1.3.1 1994/12/12 17:46:12 randys | |
163 | * Putting initial flipc implementation under flipc_shared | |
164 | * [1994/12/12 16:27:46 randys] | |
165 | * | |
166 | * Revision 1.1.1.2 1994/12/11 23:11:18 randys | |
167 | * Initial flipc code checkin | |
168 | * | |
169 | * $EndLog$ | |
170 | */ | |
171 | ||
172 | /* | |
173 | * mach/flipc_cb.h | |
174 | * | |
175 | * This file is intended to be the data structure layout for the flipc | |
176 | * communcations buffer, both for the KKT implementation and | |
177 | * for the eventual paragon implementation. This file should include | |
178 | * all of the information necessary for either humans or machines to | |
179 | * understand the data structure layout. | |
180 | * | |
181 | * The communications buffer is the wired section of memory used for | |
182 | * communication between the flipc applications interface layer and | |
183 | * the flipc message engine. No structure in it are visible to the | |
184 | * user; the applications interface layer mediates all user access to | |
185 | * the CB. | |
186 | */ | |
187 | ||
188 | #ifndef _MACH_FLIPC_CB_H_ | |
189 | #define _MACH_FLIPC_CB_H_ | |
190 | ||
191 | #include <mach/flipc_types.h> | |
192 | ||
193 | /* | |
194 | * Flipc naming and argument ordering conventions (this applies mainly to | |
195 | * user-interface.h, but seems inappropriate in a user-visible header file): | |
196 | * | |
197 | * All objects prefixed with "flipc"; uppercase for user-visible | |
198 | * objects, lower case for internal ones. | |
199 | * | |
200 | * Types created with typedef will have _t suffixes. | |
201 | * | |
202 | * Words will be separated by '_'. | |
203 | * | |
204 | * Macro definitions will be all in caps. | |
205 | * | |
206 | * Enum members will have their initial letter (after Flipc) capitalized. | |
207 | * | |
208 | * | |
209 | * For user-visible routines: | |
210 | * | |
211 | * The first word following the "flipc" will be the flipc object type that | |
212 | * that routine operates on (specifically "domain", "epgroup", | |
213 | * "endpoint", or "buffer"). | |
214 | * | |
215 | * The object named by the first word of the call will, if an argument | |
216 | * to the call, be the first argument. | |
217 | * | |
218 | * Output variables passed as pointers in the arglist will come last. | |
219 | */ | |
220 | ||
221 | /* | |
222 | * The kinds of objects that exist in the communications buffer are: | |
223 | * | |
224 | * Endpoints -- Used for sending or receiving. | |
225 | * Buffers -- Composed of a buffer header and buffer data. | |
226 | * Endpoint groups -- Used for collecting multiple numbers of endpoints | |
227 | * together for a select like operation. | |
228 | */ | |
229 | ||
230 | /* | |
231 | * We can't use general pointers inside the communications buffer, | |
232 | * since the address space on either side of the interface is | |
233 | * different. The places where we could use pointers are: | |
234 | * | |
235 | * *) From endpoint sets to endpoints. | |
236 | * *) From endpoints to buffers. | |
237 | * | |
238 | * The kinds of pointers we could use are: | |
239 | * *) Byte offset from the beginning of the comm buffer. This | |
240 | * is simple, but has the disadvantage of allowing the user to | |
241 | * play games with pointing endpoint buffer pointers into data | |
242 | * space, & etc. | |
243 | * *) Rigid arrays of each type of object, with the object | |
244 | * "pointer" being an index into the array. This avoids the | |
245 | * above problem, but complicates memory allocation (forces | |
246 | * allocation to be contiguous, which may force pre-deciding | |
247 | * how much space each of the above types will take). | |
248 | * | |
249 | * Though we appear to be going for the rigid allocation for each type | |
250 | * of data structure, I'm still going to do the "simple offset" | |
251 | * solution to maintain maximum flexibility into the future. | |
252 | * The single exception to this is that FLIPC addresses will be composed of | |
253 | * node number and endpoint number, where the endpoint number will be | |
254 | * the index into the endpoint array. | |
255 | */ | |
256 | ||
257 | typedef unsigned long flipc_cb_ptr; | |
258 | /* Define a null value, which doesn't point anywhere into the CB. */ | |
259 | #define FLIPC_CBPTR_NULL ((flipc_cb_ptr) -1) | |
260 | ||
261 | /* | |
262 | * Synchronization between message engine and application. | |
263 | * | |
264 | * In general, it isn't reasonable to allow locking and unlocking of | |
265 | * data structures between message engine and communications buffer, | |
266 | * as this requires the message engine to trust arbitrary user | |
267 | * threads. The solution is to arrange all data structures so that | |
268 | * they may be accessed by both parties without locking. The way that | |
269 | * this is usually done is that specific variables are considered to | |
270 | * be owned by one of the ME or the AIL, and the other party is | |
271 | * allowed to read the variable but not to modify it. With this | |
272 | * arrangement, implementing things like producer/consumer circular | |
273 | * queues is possible; each agent (ME or AIL) goes around the list | |
274 | * doing its thing, and avoids passing the pointer showing where the | |
275 | * other agent is working. | |
276 | * | |
277 | * Following the above, we may divide structure members into five | |
278 | * classes, and define prefixes for these five classes. | |
279 | * | |
280 | * Description Prefix | |
281 | * ------------------------------- | |
282 | * Private to AIL pail_ | |
283 | * Private to ME pme_ | |
284 | * AIL owned, read by ME sail_ | |
285 | * ME owned, read by AIL sme_ | |
286 | * Shared in other way shrd_ | |
287 | * | |
288 | * Shared variables may change their ownership based on their own | |
289 | * or someone elses value (these variables may be thought of as | |
290 | * being handed back and forth between the two entities) or on a | |
291 | * configuration option of the structure (not handed back and forth, | |
292 | * but still based on another variables value). | |
293 | * | |
294 | * In addition, I am going to put variables that are set at endpoint | |
295 | * allocation and cleared at deallocation (but read by both sides) in | |
296 | * a separate class; they are "AIL owned, read by ME" but are | |
297 | * effectively constant over the synchronization protocols we care | |
298 | * about. | |
299 | * | |
300 | * Constant after allocation const_ | |
301 | * | |
302 | * Note that this ignores memory consistency issues (when the two | |
303 | * agents are actually on two separate processors). These issues need | |
304 | * to be explored in more detail; for now suffice it to say that the | |
305 | * above methods work given a sequentially consistent memory model or | |
306 | * a processor consistent memory model. | |
307 | * | |
308 | * Also note that an optimizing compiler may reorder our memory | |
309 | * accesses, playing merry hell with the inter-node synchronization | |
310 | * protocols (the compiler doesn't know about the other node, after | |
311 | * all). To avoid this, all structure members used for | |
312 | * synchronization will be marked volatile; this will force the | |
313 | * compiler to keep the order and number of accesses intact. This | |
314 | * will also force the compiler *not* to optimize way accesses to | |
315 | * these variables, so it is wise to explicitly load the variable into | |
316 | * a temporary once if you need to do multiple computations with it, | |
317 | * and store it back afterwards when you are done. | |
318 | */ | |
319 | ||
320 | /* | |
321 | * Memory allocation: | |
322 | * | |
323 | * For maximum simplicity in the first implementation, we need to know | |
324 | * at comm buffer allocation time how many endpoints, endpoint_sets, | |
325 | * and buffers we will want total, until the end of time. This | |
326 | * masively simplifies memory allocation; there will be a single array | |
327 | * of each type of data and the communication buffer will be taken up | |
328 | * by the concatenation of these arrays (with some fiddling to make | |
329 | * sure that no data crosses a page boundary). | |
330 | * | |
331 | * For each data type there will be a free list to which pieces of | |
332 | * data will be added to or removed from as needed. Each data type | |
333 | * will have a pointer in it to allow it to be linked onto the free | |
334 | * list. | |
335 | */ | |
336 | ||
337 | /* | |
338 | * Multiple thread access to data structures: | |
339 | * | |
340 | * There are several points in the communications buffer (notably | |
341 | * endpoint accesses) when multiple application threads will be | |
342 | * attempting operations on data structures at the same time. To | |
343 | * multiplex these operations, we need a per-data structure lock. | |
344 | * Lock attributes: | |
345 | * *) This lock will not be kernel based, as such a lock would be | |
346 | * too heavyweight to use for arbitrary sending and receiving | |
347 | * operations). | |
348 | * *) Because it is not kernel based, it may not be used to | |
349 | * multiplex accesses from threads at different kernel | |
350 | * priority levels. Deadlock would result if a low-priority | |
351 | * thread gained the lock and then was prempted by a | |
352 | * high-priority thread that wanted to acquire it. | |
353 | * *) Architecture-dependent interfaces need to be designed to | |
354 | * atomically lock and unlock this data structure. | |
355 | * | |
356 | * These are "simple locks" and are defined in flipc_dep.h. | |
357 | */ | |
358 | ||
359 | /* | |
360 | * Lock type. This placement (in flipc_cb.h) is a little bit of a | |
361 | * hack, as it really should be defined with the machine dependent lock | |
362 | * macros. But then the machine independent lock macros have problems | |
363 | * because they have to include it both before and after the prototypes. | |
364 | * So rather than split the machine dependent stuff into multiple | |
365 | * files, I'll define it here and hope that this definition works for | |
366 | * whatever architectures we're on. | |
367 | */ | |
368 | typedef unsigned long flipc_simple_lock; | |
369 | ||
370 | /* | |
371 | * Ownership of data structures. | |
372 | * | |
373 | * Please note that this is a can of worms, and that I (Randys) | |
374 | * consider this (and it's interactions with endpoint group membership) | |
375 | * the likeliest place for design bugs in FLIPC. Any and all should | |
376 | * take this as an open invitation and challenge to find bugs in what | |
377 | * follows. | |
378 | * | |
379 | * Rules: | |
380 | * | |
381 | * *) If you've disabled a structure and synched with the | |
382 | * appropriate side of the ME, the ME won't touch it. | |
383 | * | |
384 | * *) If you've taken a send endpoint off of the send endpoint | |
385 | * list and sync'd with the ME, the ME won't touch it. | |
386 | * | |
387 | *[The rest of this applies to the AIL only; the above rules are the | |
388 | * only ones the ME respects. ] | |
389 | * | |
390 | * *) Within the AIL, a disabled structure is owned by: | |
391 | * *) The routine that disabled it, before it is put on | |
392 | * the free list. | |
393 | * *) The routine that dequeued it from the free list, | |
394 | * before it is enabled. | |
395 | * Taking of the simple lock is not required for ownership in | |
396 | * these cases. Taking of the simple lock is not required for | |
397 | * the act of *enabling* the structure (you have ownership and | |
398 | * are giving it away), however it is required for the act of | |
399 | * disabling the structure (since it is the only valid way to | |
400 | * take ownership of an enabled structure, and you can't | |
401 | * modify the enabled bit without having ownership). | |
402 | * | |
403 | * *) The simple lock in a structure always needs to be valid, as | |
404 | * simple locks may be taken while the structure is in any | |
405 | * state. Simiarly, the enabled bit must always be valid, | |
406 | * both because it's what the ME checks, and because it may be | |
407 | * checked by the AIL while the structure is free. | |
408 | * | |
409 | * *) Holding the simple lock on an enabled structure imparts | |
410 | * ownership of that structure. You are allowed to take the | |
411 | * simple lock of a disabled structure, but ownership is not | |
412 | * gained by doing so. | |
413 | * | |
414 | * *) You are allowed to read the enabled/disabled bit without | |
415 | * owning the structure (if the structure is disabled, there | |
416 | * may be no way to gain the ownership). | |
417 | * | |
418 | * *) Owning a structure allows you to do what you want with it, | |
419 | * except: | |
420 | * *) As mentioned above, the simple lock and | |
421 | * enabled/disabled bit must always be valid. | |
422 | * *) The ownership of the endpoint group related members | |
423 | * of an endpoint structure is special; see below. | |
424 | * *) The allocations lock must be held to manipulate the | |
425 | * next send endpoint field of any endpoint. | |
426 | * | |
427 | * *) If an endpoint is on an endpoint group, the ownership of | |
428 | * the the endpoint group related members of the structure | |
429 | * (sail_endpoint_group and pail_next_eg_endpoint) go with the | |
430 | * owndership of the endpoint group, not the endpoint. For | |
431 | * this purpose only, membership is defined atomically as the | |
432 | * sail_endpoint_group pointer being set to an endpoint group. | |
433 | * Thus one may remove an endpoint from an endpoint group | |
434 | * without owning the endpoint (change the sail_endpoint_group | |
435 | * pointer last). One requires both locks to add an endpoint | |
436 | * to an endpoint group, however. | |
437 | * | |
438 | * (Part of the motivation for this is that removal and | |
439 | * addition of endpoints to endpoint groups requires | |
440 | * modifications of pointers in other endpoint structures). | |
441 | * | |
442 | * *) No structure may be put on the free list if marked with any | |
443 | * association to any other structure. Specifically, endpoint | |
444 | * groups may have no endpoints belonging to them, and | |
445 | * endpoints may not belong to an endpoint group or have | |
446 | * buffers belonging to them. | |
447 | * | |
448 | * *) One consequence of the above is that endpoint groups may | |
449 | * not be marked as disabled while they have any endpoints on | |
450 | * them, as freeing an endpoint requires it to be removed from | |
451 | * its endpoint group, and if ownership of the endpoint group | |
452 | * cannot be gained, that is impossible. | |
453 | * | |
454 | * *) In theory, endpoints *may* be marked disabled while they | |
455 | * are still on endpoint groups. In practice, they are not. | |
456 | * This is relied on by the code which frees endpoint groups, | |
457 | * in a non-obvious way. Specifically, that code assumes that | |
458 | * there is no way that a call to free endpoint will return | |
459 | * with the endpoint still on the endpoint group. Since the | |
460 | * only way for free endpoint to fail is if the endpoint is | |
461 | * inactive, and since the endpoint is set inactive only after | |
462 | * free endpoint (presumably a different one) confirms that it | |
463 | * isn't on any endpoint group, this assumption is true. | |
464 | * | |
465 | * Got that? Take home lesson: don't allow endpoints to be | |
466 | * marked disabled while still on endpoint groups until you | |
467 | * *do* get that, and are willing to take the responsibility | |
468 | * of changing it so that it works under your new scheme. | |
469 | * | |
470 | * *) Ownership of the freelist(s) are gained by holding the | |
471 | * allocations lock for the buffer, and *only* in that way. | |
472 | * No modification of freelist, send endpoint list, or send | |
473 | * side ME sync bits is valid without holding the allocations | |
474 | * lock. In other words, while you can read things in the | |
475 | * main communications buffer control structure at will, you | |
476 | * may not change them without owning the allocations lock. | |
477 | * | |
478 | * *) The state where a structure is disabled but off of the | |
479 | * freelist may be valid as an intermediate (while an AIL | |
480 | * routine is orchestrating a transition) but is not a valid | |
481 | * static state. This state must not survive the return to | |
482 | * application code of the thread that disabled the structure. | |
483 | */ | |
484 | ||
485 | /* | |
486 | * Flipc data buffer management. | |
487 | * | |
488 | * A buffer (whether being used for sending or receiving) may be in | |
489 | * one of three states: | |
490 | * | |
491 | * READY -- Buffer held by application. | |
492 | * PROCESSING -- Buffer held by endpoint, unprocessed. For receive endpoints, | |
493 | * this means that the buffer is empty, waiting to be filled by | |
494 | * an incoming message. For send endpoints, this means tht the | |
495 | * buffer is full, waiting to be sent out. | |
496 | * COMPLETED -- Buffer held by the endpoint, processed. For receive | |
497 | * endpoints, this means that the buffer is full, with newly | |
498 | * received data in it. For send endpoints, this means that the | |
499 | * buffer is empty (*), with it's data having been sent out. | |
500 | * | |
501 | * (*) In point of fact the data hasn't been touched, though bits | |
502 | * may have been fiddled with in the header data structure. But | |
503 | * it's been sent. | |
504 | * FREE -- The buffer is in the pool of free buffers, and may be | |
505 | * allocated to any newly created endpoint. | |
506 | * | |
507 | * The transition diagram between these states is relatively simple: | |
508 | * | |
509 | * | |
510 | * release | |
511 | * /-----------------\| | |
512 | * +----------+ -+----------+ | |
513 | * | READY | |PROCESSING|<- - - - - - | |
514 | * +----------+_ +----------+ \ | |
515 | * ^ |\ - - - - - - - - / | | \endpoint allocate | |
516 | * | (processed) \endpoint \ | |
517 | * | | \ free | | |
518 | * | acquire / ------\ | |
519 | * | \ | | |
520 | * | / (processed) >+----------+ | |
521 | * +----------+ | FREE | | |
522 | * |COMPLETED |< - - - - - - - - - - +----------+ | |
523 | * +----------+ endpoint allocate / ^ | |
524 | * | ^- - - - - - - - - - - - - - - - - - - - - - - | | |
525 | * | / | |
526 | * \ endpoint free / | |
527 | * ------------------------------------------------------/ | |
528 | * | |
529 | * (If it doesn't look simple, imagine it without the FREE state; that | |
530 | * state doesn't enter into almost any buffer manipulations) | |
531 | * | |
532 | * For send buffers, release==send, acquire==allocate, and | |
533 | * processed==the sending done by the message engine. For receive buffers, | |
534 | * release==release, acquire==receive, and process==the actual | |
535 | * arrival of the message handled by the messaging engine. | |
536 | * | |
537 | * The choice of path from the PROCESSING state is an endpoint | |
538 | * specific configuration option; a particular endpoint may leave a | |
539 | * processed buffer on the endpoint, or it may release it back to the | |
540 | * application by dropping it from the endpoint. | |
541 | * | |
542 | * Buffers are assigned the PROCESSING state on a newly allocated | |
543 | * receive endpoint (to be ready to receive messages) and the | |
544 | * COMPLETED state on a newly allocated send endpoint. | |
545 | * | |
546 | * The state (other than FREE) that a particular buffer is in is | |
547 | * determined by its place on a circular queue of buffer pointers that | |
548 | * is part of the endpoint structure. Buffers owned by the | |
549 | * application (READY) are not pointed to by pointers on this queue. | |
550 | * The buffer is released to the message engine by placement of a | |
551 | * pointer to it on this queue. When the message engine is done | |
552 | * processing the buffer, it sets a flag in the buffer header. If the | |
553 | * endpoint is so configured, it then removes the buffer pointer from | |
554 | * the queue; otherwise the AIL acquires the buffer (and removes the | |
555 | * pointer from the queue) when it chooses. | |
556 | * | |
557 | * . . . . . . | |
558 | * . . | |
559 | * . . | |
560 | * . . AIL releasing | |
561 | * . . ^ | |
562 | * . +-------+--/ | |
563 | * . | | | |
564 | * . |Buffers| | |
565 | * . | to be | | |
566 | * . |Sent or| | |
567 | * . |Receivd| | |
568 | * . | Into | ^ ME processing | |
569 | * . +-------+ --/ | |
570 | * . | | | |
571 | * . AIL | Sent | (These buffers have a flag set to indicate | |
572 | * .Acquiring| or | that they have been processed. This | |
573 | * . |Filled | section is optional; the endpoint may be | |
574 | * . |buffers| configured to drop buffers after processing) | |
575 | * . ^ | | | |
576 | * . \--+-------+ | |
577 | * . . | |
578 | * . . | |
579 | * . . . . . . | |
580 | * | |
581 | * | |
582 | * The AIL will refuse to acquire a buffer that has not yet been | |
583 | * processed by the ME. Acquire will not work at all on endpoints | |
584 | * that have been configured to drop buffers on completion. | |
585 | * | |
586 | * The buffer_available primitive is coded to avoid doing a | |
587 | * (potentially costly) acquiring of the endpoint flipc lock. Since | |
588 | * telling where there is a buffer available requires two operations | |
589 | * (comparison of the acquire and release pointers to see if there are | |
590 | * any buffers on the endpoint, and then indirection of the acquire | |
591 | * pointer to see if that buffer has bee processed yet), there is a | |
592 | * potential race that will admit the possibility of indirecting | |
593 | * through an invalid pointer. For this reason, for the life of an | |
594 | * endpoint, it is a requirement that all buffer pointers on the | |
595 | * bufferlist point *somewhere* (ie. to some existing buffer), so that | |
596 | * this indirection will not cause an access error. The | |
597 | * buffer_available primitive may return the wrong result, but (as | |
598 | * long as the incorrectness is transitory), this is acceptable. | |
599 | */ | |
600 | ||
601 | /* Set up the states so that FLIPC_buffer_processed can just do an | |
602 | & and a test. */ | |
603 | typedef enum { | |
604 | flipc_Free = 0x0, flipc_Processing = 0x1, | |
605 | flipc_Completed = 0x2, flipc_Ready = 0x3 | |
606 | } flipc_buffer_state_t; | |
607 | #define FLIPC_BUFFER_PROCESSED_P(state) ((state) & 0x2) | |
608 | ||
609 | /* | |
610 | * Data header/buffer layout. | |
611 | * | |
612 | * For this implementation, and probably for all time, the header | |
613 | * immediately precedes the data in memory, and the mesaging engine | |
614 | * will send both header and data. Our priority is message dispatch | |
615 | * speed rather than raw bandwidth (this is the small message side of | |
616 | * a transfer mechanism), so we don't mind that we are throwing away | |
617 | * some bandwidth by taking up transferred space with header data. | |
618 | * | |
619 | * The data size will be the maximum size allowed by the underlying | |
620 | * transport, minus the header size (available at run time). The user | |
621 | * will be given a pointer to the data buffer, and will use this both | |
622 | * for copying data in and out, and as an argument to the underlying | |
623 | * flipc routines. The flipc routines will access appropriately. | |
624 | * | |
625 | * The header structure follows; the user data type will be offset and | |
626 | * cast appropriately to access this. | |
627 | */ | |
628 | ||
629 | typedef struct flipc_data_buffer { | |
630 | union { | |
631 | FLIPC_address_t destination; /* For sending. */ | |
632 | flipc_cb_ptr free; /* Link for header free list. */ | |
633 | } u; | |
634 | ||
635 | /* ME owned if flipc_Processing, AIL owned otherwise. May not ever | |
636 | assume the state flipc_Ready in an optimized implementation. */ | |
637 | volatile flipc_buffer_state_t shrd_state; | |
638 | } *flipc_data_buffer_t; | |
639 | ||
640 | /* | |
641 | * Endpoint structure. | |
642 | * | |
643 | * An endpoint is the data structure used for communicating buffers, | |
644 | * either send or receive. Note that all actual circular lists of | |
645 | * buffer pointers on the endpoints are in their own array that gets | |
646 | * partitioned out to the various endpoints. This is because we want | |
647 | * the endpoint structures themselves to be fixed size for easy | |
648 | * indexing upon receit of a message. This large scale array will be | |
649 | * of size (max_buffers_per_endpoint) * (number_of_endpoints). Both | |
650 | * of these values are set during the domain initialization call. | |
651 | * | |
652 | * Note that the pointers contained in the buffer lists are pointers to | |
653 | * buffer *headers*, not to the data. | |
654 | */ | |
655 | ||
656 | /* | |
657 | * This structure is divided into four cache lines, separated by their | |
658 | * usage type: | |
659 | * | |
660 | * *) Private data that the AIL scribbles on. | |
661 | * *) Data the AIL writes (regularly) that the ME reads | |
662 | * (occaisionally). The canonical example is the release pointer. | |
663 | * *) Private data that the ME scribbles on. | |
664 | * *) Data the ME writes (regularly) that the AIL reads (occaisionally). | |
665 | * The canonical example is the process pointer. | |
666 | * | |
667 | * There are a couple of other categories of stuff, that can be shoehorned | |
668 | * into the above: | |
669 | * *) Constant data that both sides read regularly. This can be | |
670 | * duplicated in the two private areas (actually, it can be | |
671 | * duplicated in any two areas that stay in the cache of the | |
672 | * respective processors). | |
673 | * *) Stuff that is not accessed on the critical path; it can go | |
674 | * almost anywhere (probably in one of the two ping-ponging | |
675 | * cache lines). | |
676 | * *) Stuff that is read-only for a single processor goes in that | |
677 | * processors private data section. | |
678 | * | |
679 | * Duplicate entries have a "p" or a "a" suffixed to the name to | |
680 | * indicate that fact. Note that these will usually, but not always, | |
681 | * be "const" variables--they may be "const" variables only from the | |
682 | * critical path viewpoint. | |
683 | * | |
684 | * We take cache line length as being 8 * sizeof(int). | |
685 | */ | |
686 | ||
687 | typedef struct flipc_endpoint { | |
688 | ||
689 | /* ===Private AIL data=== */ | |
690 | /* Type of endpoint (send, recv, etc). Duplicated in private | |
691 | ME section. */ | |
692 | FLIPC_endpoint_type_t constda_type; | |
693 | ||
694 | /* This next value is two variables squeezed into a single word to | |
695 | * save on memory accesses (since they are almost always read at | |
696 | * the same time. The two variables are: | |
697 | * | |
698 | * const_drop_processed_buffers -- Should the message engine drop | |
699 | * buffers after processing them (as opposed to leaving them on | |
700 | * the endpoint)? | |
701 | * | |
702 | * sail_enabled (volatile) -- Is the endpoint enabled? This isn't | |
703 | * marked constant because it is used for synchronization on | |
704 | * endpoint deallocation. | |
705 | * | |
706 | * Note that to reduce test and branches, we these two variables | |
707 | * are represented by two bits in the word (bit 0 and bit 16). It | |
708 | * is illegal to have bits other than 0 and 16 set in this word. | |
709 | * This assumption is used in ENABLED_AND_NOT_DPB_P, and is enforced | |
710 | * in DOE_CONSTRUCT (assumed to not be performance critical) below. | |
711 | * | |
712 | * Duplicated in private ME section. | |
713 | */ | |
714 | ||
715 | volatile unsigned long sailda_dpb_or_enabled; | |
716 | ||
717 | #define EXTRACT_DPB(dpb_or_enabled) ((dpb_or_enabled) >> 16) | |
718 | #define EXTRACT_ENABLED(dpb_or_enabled) ((dpb_or_enabled) & 0xffff) | |
719 | #define DISABLED_OR_DPB_P(dpb_or_enabled) ((dpb_or_enabled) ^ 0x1) | |
720 | #define DOE_CONSTRUCT(dpb, enabled) \ | |
721 | (((dpb) ? 0x10000 : 0) | ((enabled) ? 0x1 : 0)) | |
722 | ||
723 | flipc_simple_lock pail_lock; /* Simple lock for serializing | |
724 | multiple thread access to | |
725 | structure. AIL owned. */ | |
726 | /* First element in buffer list array that is ours. Constant | |
727 | from communications buffer initialization. */ | |
728 | flipc_cb_ptr constda_my_buffer_list; | |
729 | /* First element after my_buffer_list that is *not* in my buffer | |
730 | list. Constant from communications buffer initialization. */ | |
731 | flipc_cb_ptr constda_next_buffer_list; | |
732 | ||
733 | /* First location that has a valid buffer pointer in it. This may | |
734 | contain a pointer to a buffer available for acquisition, or it | |
735 | may contain a pointer to a buffer that is still being | |
736 | processed; the buffer header or process_ptr needs to be checked | |
737 | to be sure. This location is AIL owned. It is ignored by all | |
738 | (including the ME and initialization code) if | |
739 | drop_processed_buffers, above, is set. */ | |
740 | volatile flipc_cb_ptr shrd_acquire_ptr; | |
741 | ||
742 | /* AIL private copy of process pointer. This hopefully means that | |
743 | the AIL won't need to read the real process pointer (and fault | |
744 | in a cache line) very often. */ | |
745 | flipc_cb_ptr pail_process_ptr; | |
746 | ||
747 | unsigned int pad_pail_7; | |
748 | ||
749 | /* ===End of cache line===*/ | |
750 | /* ===AIL writes, ME occaisionally reads=== */ | |
751 | ||
752 | /* Next location at which the AIL may insert a buffer pointer. */ | |
753 | volatile flipc_cb_ptr sail_release_ptr; | |
754 | unsigned int pad_sail_1; | |
755 | unsigned int pad_sail_2; | |
756 | unsigned int pad_sail_3; | |
757 | unsigned int pad_sail_4; | |
758 | unsigned int pad_sail_5; | |
759 | unsigned int pad_sail_6; | |
760 | unsigned int pad_sail_7; | |
761 | ||
762 | /* ===End of cache line===*/ | |
763 | /* ===Private ME data=== */ | |
764 | /* See above comments (in private ail section). */ | |
765 | ||
766 | FLIPC_endpoint_type_t constdm_type; | |
767 | volatile unsigned long saildm_dpb_or_enabled; | |
768 | ||
769 | volatile unsigned long sme_overruns; /* For a receive endpoint, counter for | |
770 | the number of messages that have | |
771 | arrived when there hasn't been | |
772 | space. ME owned. */ | |
773 | unsigned long pail_overruns_seen; /* A count of the number of overruns | |
774 | that the AIL has noted and doesn't | |
775 | want to be bothered with again. | |
776 | The user only sees the difference | |
777 | between the previous count and this. */ | |
778 | ||
779 | /* | |
780 | * For send endpoints; linked into a list that is used by the ME | |
781 | * to find stuff to do. Also used for endpoint free list. | |
782 | * Null if at end of list. Not "const" because it's used as a | |
783 | * synchronization variable during setup and teardown | |
784 | * of send endpoints. | |
785 | */ | |
786 | volatile flipc_cb_ptr sail_next_send_endpoint; | |
787 | ||
788 | /* Constant buffer lsit pointers for ME. See private ail comments. */ | |
789 | flipc_cb_ptr constdm_my_buffer_list; | |
790 | flipc_cb_ptr constdm_next_buffer_list; | |
791 | ||
792 | /* Private ME copy of release pointer. This hopefully means that | |
793 | the ME won't have to read (and fault in a cache line) the | |
794 | release pointer very often. */ | |
795 | ||
796 | flipc_cb_ptr pme_release_ptr; | |
797 | /* ===End of cache line===*/ | |
798 | ||
799 | /* ===ME writes, AIL occaisionally reads=== */ | |
800 | /* | |
801 | * For endpoint group membership. | |
802 | */ | |
803 | flipc_cb_ptr pail_next_eg_endpoint; /* Next endpoint in endpoint group. | |
804 | AIL owned. */ | |
805 | flipc_cb_ptr sail_epgroup; /* Direct pointer to endpoint group that | |
806 | we are part of. FLIPC_CBPTR_NULL | |
807 | if none. AIL owned. */ | |
808 | ||
809 | /* First location that has a buffer pointer available for | |
810 | processing. If this value is equal to the release_ptr there are no | |
811 | buffers available for processing. */ | |
812 | volatile flipc_cb_ptr sme_process_ptr; | |
813 | unsigned int pad_sme_3; | |
814 | unsigned int pad_sme_4; | |
815 | unsigned int pad_sme_5; | |
816 | unsigned int pad_sme_6; | |
817 | unsigned int pad_sme_7; | |
818 | ||
819 | /* ===End of cache line===*/ | |
820 | /* ===END=== */ | |
821 | ||
822 | /* The following macros may have possible performance loss in | |
823 | multiple accesses (or indirection, but a good compiler will get | |
824 | around that). We need to have versions for each processor so | |
825 | that the constant reads are done from the right copy. */ | |
826 | ||
827 | /* General bufferlist pointer increment macro, with versions | |
828 | for ME and AIL. */ | |
829 | ||
830 | #define NEXT_BUFFERLIST_PTR(bufferlist_ptr, endpoint, suf) \ | |
831 | (((bufferlist_ptr) + sizeof(flipc_data_buffer_t) \ | |
832 | == ((endpoint)->const ## suf ## _next_buffer_list)) ? \ | |
833 | ((endpoint)->const ## suf ## _my_buffer_list) : \ | |
834 | (bufferlist_ptr) + sizeof(flipc_data_buffer_t)) | |
835 | #define NEXT_BUFFERLIST_PTR_ME(bufferlist_ptr, endpoint) \ | |
836 | NEXT_BUFFERLIST_PTR(bufferlist_ptr, endpoint, dm) | |
837 | #define NEXT_BUFFERLIST_PTR_AIL(bufferlist_ptr, endpoint) \ | |
838 | NEXT_BUFFERLIST_PTR(bufferlist_ptr, endpoint, da) | |
839 | ||
840 | /* Macros for each of "can I release onto this buffer?" "Can I | |
841 | acquire from this buffer?" and "Can I process an element on | |
842 | this buffer?" The first two presume they are being executed on | |
843 | the main procesor, the third on the co-processor. | |
844 | All have three arguments: | |
845 | *) A variable which will be set to the release, acquire, or | |
846 | process pointer after the macro *if* the operation is ok. | |
847 | *) A temporary variable used inside the function. | |
848 | *) The endpoint. | |
849 | ||
850 | We presume the acquire macro won't be called if drop processed | |
851 | buffers is enabled; the process and release macros deal | |
852 | appropriately with that issue. */ | |
853 | ||
854 | /* In general these macros will: | |
855 | *) Not read a volatile structure member more than once. | |
856 | *) If a variables owner is the other processor, these macros | |
857 | will check a local copy of the variable first before checking | |
858 | the other processors. | |
859 | *) Will only update the local copy if the remote copy really is | |
860 | different from the local one. | |
861 | */ | |
862 | ||
863 | /* This macro implements the synchronization check; local cbptr is | |
864 | the pointer owned by the local processor which we want to compare | |
865 | with a pointer on the remote processor which we have a copy | |
866 | of locally. Reads the remote pointer zero or one times; other | |
867 | reads are as necessary. | |
868 | ||
869 | The algorithm is: | |
870 | *) If the local copy says our pointer and the remote value aren't equal, | |
871 | we're done. | |
872 | *) Otherwise, check the remote copy. If it says the values aren't | |
873 | equal, update the local copy. */ | |
874 | ||
875 | #define ENDPOINT_SYNCNE_CHECK(local_cbptr, copy_rmt_cbptr, \ | |
876 | rmt_cbptr, tmp_cbptr) \ | |
877 | ((local_cbptr) != (copy_rmt_cbptr) \ | |
878 | || ((((tmp_cbptr) = (rmt_cbptr)) != (local_cbptr)) \ | |
879 | && (((copy_rmt_cbptr) = (tmp_cbptr)), 1))) | |
880 | ||
881 | #define ENDPOINT_ACQUIRE_OK(acquire_cbptr, tmp_cbptr, endpoint) \ | |
882 | ((acquire_cbptr) = (endpoint)->shrd_acquire_ptr, \ | |
883 | ENDPOINT_SYNCNE_CHECK(acquire_cbptr, (endpoint)->pail_process_ptr, \ | |
884 | (endpoint)->sme_process_ptr, tmp_cbptr)) | |
885 | ||
886 | #define ENDPOINT_PROCESS_OK(process_cbptr, tmp_cbptr, endpoint) \ | |
887 | ((process_cbptr) = (endpoint)->sme_process_ptr, \ | |
888 | ENDPOINT_SYNCNE_CHECK(process_cbptr, (endpoint)->pme_release_ptr, \ | |
889 | (endpoint)->sail_release_ptr, tmp_cbptr)) | |
890 | ||
891 | #define NODPB_ENDPOINT_RELEASE_OK(release_cbptr, tmp_cbptr, endpoint) \ | |
892 | ((release_cbptr) = (endpoint)->sail_release_ptr, \ | |
893 | (tmp_cbptr) = (endpoint)->shrd_acquire_ptr, \ | |
894 | (NEXT_BUFFERLIST_PTR_AIL(release_cbptr, endpoint) \ | |
895 | != (tmp_cbptr))) | |
896 | ||
897 | /* Don't use NEXT_BUFFERLIST_PTR here to save a temporary variable. */ | |
898 | #define DPB_ENDPOINT_RELEASE_OK(release_cbptr, tmp_cbptr, endpoint) \ | |
899 | (release_cbptr = (endpoint)->sail_release_ptr, \ | |
900 | ((release_cbptr + sizeof(flipc_data_buffer_t) == \ | |
901 | (endpoint)->constda_next_buffer_list) \ | |
902 | ? ENDPOINT_SYNCNE_CHECK((endpoint)->constda_my_buffer_list, \ | |
903 | (endpoint)->pail_process_ptr, \ | |
904 | (endpoint)->sme_process_ptr, \ | |
905 | tmp_cbptr) \ | |
906 | : ENDPOINT_SYNCNE_CHECK(release_cbptr + sizeof(flipc_data_buffer_t), \ | |
907 | (endpoint)->pail_process_ptr, \ | |
908 | (endpoint)->sme_process_ptr, \ | |
909 | tmp_cbptr))) | |
910 | ||
911 | /* This next is tricky; remember that acquire_ptr points | |
912 | to an actual bufferptr on the list, whereas release_ptr does | |
913 | not. This macro is only used in FLIPC_endpoint_query, and so | |
914 | doesn't need to have an ME version. */ | |
915 | ||
916 | #define BUFFERS_ON_ENDPOINT_AIL(acquire_ptr, release_ptr, endpoint) \ | |
917 | ((release_ptr) > (acquire_ptr) \ | |
918 | ? ((release_ptr) - (acquire_ptr)) / sizeof(flipc_cb_ptr) \ | |
919 | : ((((release_ptr) - (endpoint)->constda_my_buffer_list) \ | |
920 | + ((endpoint)->constda_next_buffer_list - acquire_ptr)) \ | |
921 | / sizeof(flipc_cb_ptr))) | |
922 | } *flipc_endpoint_t; | |
923 | ||
924 | ||
925 | /* | |
926 | * Endpoint groups. | |
927 | * | |
928 | * Used to represent a group of endpoints, for linking sending/receiving | |
929 | * with semaphores & etc. Note that there needs to be a private data | |
930 | * structure kept by the kernel that associates with each epgroup | |
931 | * a semaphore to be used for wakeups on that endpoint set. | |
932 | */ | |
933 | ||
934 | typedef struct flipc_epgroup { | |
935 | flipc_simple_lock pail_lock; /* Lock to synchronize threads (at the | |
936 | same priority level) accessing this | |
937 | structure. */ | |
938 | volatile unsigned long sail_enabled; /* Set if structure is active. */ | |
939 | unsigned long const_semaphore_associated; /* Flag to indicate whether or not | |
940 | there is a semaphore associated | |
941 | with this endpoint group in the | |
942 | kernel flipc routines. */ | |
943 | volatile unsigned long sail_wakeup_req; /* Incremented when a thread wants to | |
944 | be woken. */ | |
945 | volatile unsigned long pme_wakeup_del; /* Incremented when the ME delivers a | |
946 | wakeup. */ | |
947 | unsigned long pail_version; /* Incremented when epgroup membership | |
948 | is changed; checked when retrieving | |
949 | a buffer from an epgroup. */ | |
950 | unsigned long sail_msgs_per_wakeup; /* How many messages need to arrive | |
951 | before the ME delivers a wakeup. */ | |
952 | unsigned long pme_msgs_since_wakeup; /* How many messages have arrived | |
953 | since the last wakeup. ME | |
954 | owned. */ | |
955 | ||
956 | flipc_cb_ptr pail_first_endpoint; /* First endpoint in the group. The | |
957 | other endpoints are linked along | |
958 | behind him. AIL owned. */ | |
959 | flipc_cb_ptr pail_free; /* Used to link this endpoint onto | |
960 | the freelist. */ | |
961 | } *flipc_epgroup_t; | |
962 | ||
963 | /* | |
964 | * Communication buffer control structure. | |
965 | * | |
966 | * This is in the communications buffer itself. Note that any changes | |
967 | * in this structure require it to be locked with the allocation lock, | |
968 | * as access to this structure is shared by all threads using the CB. | |
969 | */ | |
970 | ||
971 | /* | |
972 | * Individual data type layout. | |
973 | * | |
974 | * All we need here is a pointer to the start of each type of data | |
975 | * struct, the number of those data structures in the communications | |
976 | * buffer, and a pointer to the beginning of the freelist for that data | |
977 | * structure. | |
978 | * | |
979 | * Note that the composite buffer list doesn't have a freelist associated | |
980 | * with it, since each section of the buffer list is tightly bound to an | |
981 | * endpoint, and is allocated and freed with that endpoint. We still | |
982 | * need the start and number information, though. | |
983 | */ | |
984 | struct flipc_cb_type_ctl { | |
985 | flipc_cb_ptr start; /* Where there array of this type of | |
986 | data structure starts. */ | |
987 | unsigned long number; /* How many of them we've got. */ | |
988 | flipc_cb_ptr free; /* Where the beginning of the freelist | |
989 | is. */ | |
990 | }; | |
991 | ||
992 | /* | |
993 | * Synchronization with message engine. | |
994 | * | |
995 | * At certain times (specifically during structure allocation/free or | |
996 | * additions to the send list) you want to know that the messaging | |
997 | * engine has picked up your changes. However, the message engine has | |
998 | * (effectively) two threads, one for each of the send and receive | |
999 | * sides. The mechanisms used for synchronizations with the two sides | |
1000 | * differ. In an eventual co-processor implementation (with a single | |
1001 | * thread), only the send side mechanism will be used. | |
1002 | * | |
1003 | * To request a cached state flush by the send side of the mesasging | |
1004 | * engine, you flip the request_sync bit and it responds by flipping | |
1005 | * the response_sync bit. The send ME checks this bit once every trip | |
1006 | * through the send endpoints. | |
1007 | * | |
1008 | * On the receive side, since receives take very little time and do | |
1009 | * not block (unlike sends) when we want to make sure the ME is | |
1010 | * holding no cached receive side state, we simply spin until we see | |
1011 | * that the ME receive side is no longer operating. It sets a | |
1012 | * variable whenever it is in the process of receiving a message. | |
1013 | */ | |
1014 | ||
1015 | /* | |
1016 | * Proper manipulation of the send endpoint list. | |
1017 | * | |
1018 | * Note that synchronizing with the message engine over access to the | |
1019 | * send endpoint list is especially tricky. There is no problem with | |
1020 | * writing new values in all of the locations required to take a send | |
1021 | * endpoint off of the list. However, we must be very sure before | |
1022 | * modifying the pointer *in* the send endpoint that the ME isn't | |
1023 | * currently working in that send endpoint (else it could be sent off | |
1024 | * into the void). Two options here: | |
1025 | * | |
1026 | * *) Synchronize (using the below variables) for each send | |
1027 | * endpoint removed, after the removal but before the | |
1028 | * modification of the data in the internal structure. | |
1029 | * *) If we can always be sure that the send endpoint link in the | |
1030 | * endpoint structure has a valid value, we can simply let the | |
1031 | * chips fall where they may. It will be null while free, and | |
1032 | * have a value that points back into the send buffer list | |
1033 | * when reallocated. I'm not going to do this; it's sleezy | |
1034 | * and will partially mess up fairness based on ME send | |
1035 | * endpoint round-robinning. | |
1036 | */ | |
1037 | ||
1038 | /* | |
1039 | * This entire structure is protected by an kernel level lock so there | |
1040 | * is no conflict between threads accessing it. See flipc_kfr.c for | |
1041 | * details on this lock; how it is implemented and used depends on what | |
1042 | * kernel base we are on. | |
1043 | */ | |
1044 | ||
1045 | /* | |
1046 | * Note that the last element of this structure is variable sized, so this | |
1047 | * structure itself is also variable sized. | |
1048 | */ | |
1049 | typedef struct flipc_comm_buffer_ctl { | |
1050 | /* Kernel flipc configuration that the user must match in order to | |
1051 | work with this kernel. Checked as soon as the comm buffer is | |
1052 | mapped. */ | |
1053 | struct { | |
1054 | unsigned int real_time_primitives:1; | |
1055 | unsigned int message_engine_in_kernel:1; | |
1056 | unsigned int no_bus_locking:1; /* One way check -- if the kernel doesn't | |
1057 | have this and the user does, that's | |
1058 | an error. */ | |
1059 | } kernel_configuration; | |
1060 | volatile unsigned long send_ready; /* A send(s) is ready to go */ | |
1061 | ||
1062 | /* These first three structures are constant after communications buffer | |
1063 | initialization. */ | |
1064 | unsigned long data_buffer_size; /* Size of the data buffers. */ | |
1065 | unsigned long local_node_address; /* Local node number. */ | |
1066 | FLIPC_address_t null_destination; /* Local null destination value. */ | |
1067 | ||
1068 | #if REAL_TIME_PRIMITIVES | |
1069 | /* The scheduling policy used by the task initializing flipc for | |
1070 | the allocations lock. */ | |
1071 | int allocations_lock_policy; | |
1072 | #else | |
1073 | /* A poor substitute for a kernel level allocations lock. | |
1074 | Note that this *cannot* be used as a regular simple lock; | |
1075 | instead, try to acquire it, call sleep(1), try again, etc. | |
1076 | Spinning on this lock will probably waste lots of cycles. */ | |
1077 | flipc_simple_lock pail_alloc_lock; | |
1078 | #endif | |
1079 | ||
1080 | /* All of the members of these structures except for the free pointer | |
1081 | are constant after initialization. The free pointer is ail owned | |
1082 | and private. */ | |
1083 | struct flipc_cb_type_ctl endpoint; | |
1084 | struct flipc_cb_type_ctl epgroup; | |
1085 | struct flipc_cb_type_ctl bufferlist; | |
1086 | struct flipc_cb_type_ctl data_buffer; | |
1087 | ||
1088 | /* Global synchronization with the message engine. On the KKT | |
1089 | implementation we need one synchronizer for each thread. */ | |
1090 | ||
1091 | /* Send side: */ | |
1092 | volatile unsigned long sail_request_sync; /* request_sync = !request_sync when the | |
1093 | AIL wants to synchronize with the | |
1094 | CB. */ | |
1095 | volatile unsigned long sme_respond_sync; /* respond_sync = !respond_sync when | |
1096 | the ME has noticed the sync | |
1097 | request. By responding to the | |
1098 | sync, the ME is stating that it has | |
1099 | no communications buffer state that | |
1100 | was cached previous to it noticing | |
1101 | the sync. */ | |
1102 | ||
1103 | /* Receive side. */ | |
1104 | volatile unsigned long sme_receive_in_progress; /* Set by the ME before it looks at | |
1105 | any data structures; cleared | |
1106 | afterwards. A simple spin in | |
1107 | the user space on this | |
1108 | variable will suffice, as the | |
1109 | time that the message | |
1110 | engine could be receiving | |
1111 | is low. */ | |
1112 | ||
1113 | /* Send endpoint list starts here. */ | |
1114 | volatile flipc_cb_ptr sail_send_endpoint_list; /* Null if no send endpoints. | |
1115 | */ | |
1116 | ||
1117 | /* Keep track of whatever performance information we choose. */ | |
1118 | struct FLIPC_domain_performance_info performance; | |
1119 | ||
1120 | /* Keep track of various kinds of error information here. */ | |
1121 | struct FLIPC_domain_errors sme_error_log; | |
1122 | ||
1123 | } *flipc_comm_buffer_ctl_t; | |
1124 | ||
1125 | ||
1126 | /* | |
1127 | * The communications buffer. | |
1128 | * | |
1129 | * The only restriction on the layout of the communications buffer is | |
1130 | * that the buffers themselves may not cross page boundaries. So we | |
1131 | * will place the data buffers at the end of the communications | |
1132 | * buffer, and the other objects at the beginning, and there may be a | |
1133 | * little bit of extra space in the middle. | |
1134 | * | |
1135 | * Note that this layout may change in future versions of FLIPC. | |
1136 | * | |
1137 | * +---------------------------+ | |
1138 | * | flipc_comm_buffer_ctl | | |
1139 | * +---------------------------+ | |
1140 | * | | | |
1141 | * | Endpoints | | |
1142 | * | | | |
1143 | * +---------------------------+ | |
1144 | * | | | |
1145 | * | Endpoint Groups | | |
1146 | * | | | |
1147 | * +---------------------------+ | |
1148 | * | | | |
1149 | * | Combined Buffer Lists | | |
1150 | * | | | |
1151 | * +---------------------------+ | |
1152 | * | | | |
1153 | * | (Possible empty space) | | |
1154 | * | | | |
1155 | * +---------------------------+ | |
1156 | * | | | |
1157 | * | Data Buffers | | |
1158 | * | | | |
1159 | * +---------------------------+ | |
1160 | */ | |
1161 | ||
1162 | /* The number of pages that the kernel will reserve for the comm | |
1163 | buffer. The AIL needs to know this to know how much to map. */ | |
1164 | #define COMM_BUFFER_SIZE 0x100000 | |
1165 | ||
1166 | /* | |
1167 | * These variables are set, in a per-address space context, to the base | |
1168 | * and length of the communications buffer. The ME needs to do bounds | |
1169 | * checking to make sure it isn't overrunning anything. Note that the | |
1170 | * existence of these variables implies that an application will only | |
1171 | * open a single domain. | |
1172 | * | |
1173 | * These declarations are duplicated in flipc/flipc_usermsg.h, and | |
1174 | * should be kept in sync with that file. | |
1175 | */ | |
1176 | unsigned char *flipc_cb_base; | |
1177 | unsigned long flipc_cb_length; /* In bytes. */ | |
1178 | ||
1179 | /* | |
1180 | * Following is a set of macros to convert back and forth between | |
1181 | * real address pointers and flipc_cb_ptr's for each data type. They | |
1182 | * rely on the flipc_cb_base being set correctly. | |
1183 | * | |
1184 | * A possible future improvement might be to have bounds checking occur | |
1185 | * inside these macros, but I'm not sure what I'd do if it failed. | |
1186 | */ | |
1187 | ||
1188 | /* Easy going one way. */ | |
1189 | #define FLIPC_CBPTR(ptr) \ | |
1190 | (((unsigned char *) (ptr)) - flipc_cb_base) | |
1191 | ||
1192 | /* Need to get the right types going the other way. */ | |
1193 | #define FLIPC_ENDPOINT_PTR(cb_ptr) \ | |
1194 | ((flipc_endpoint_t) ((cb_ptr) + flipc_cb_base)) | |
1195 | #define FLIPC_EPGROUP_PTR(cb_ptr) \ | |
1196 | ((flipc_epgroup_t) ((cb_ptr) + flipc_cb_base)) | |
1197 | #define FLIPC_DATA_BUFFER_PTR(cb_ptr) \ | |
1198 | ((flipc_data_buffer_t) ((cb_ptr) + flipc_cb_base)) | |
1199 | #define FLIPC_BUFFERLIST_PTR(cb_ptr) \ | |
1200 | ((flipc_cb_ptr *) ((cb_ptr) + flipc_cb_base)) | |
1201 | ||
1202 | ||
1203 | /* | |
1204 | * Flipc addresses. | |
1205 | * | |
1206 | * The addresses used by flipc for communication are defined in the | |
1207 | * user visible header file as unsigned longs. These macros pull that | |
1208 | * information apart for use of the FLIPC internal routines. | |
1209 | * | |
1210 | * I assume in the following that endpoints immediately follow the | |
1211 | * comm buffer control structure, because that makes indexing into | |
1212 | * them much easier. | |
1213 | */ | |
1214 | ||
1215 | #define FLIPC_CREATE_ADDRESS(node, endpoint_idx) \ | |
1216 | ((node << 16) | (endpoint_idx)) | |
1217 | #define FLIPC_ADDRESS_NODE(addr) (((unsigned long) (addr)) >> 16) | |
1218 | #define FLIPC_ADDRESS_ENDPOINT(addr) (((unsigned long) (addr)) & 0xffff) | |
1219 | ||
1220 | #endif /* _MACH_FLIPC_CB_H_ */ |