]>
Commit | Line | Data |
---|---|---|
f427ee49 A |
1 | /* |
2 | * Copyright (c) 2020 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <machine/asm.h> | |
30 | ||
31 | /* This section has all the code necessary for the atomic operations supported by | |
32 | * OSAtomicFifoEnqueue, OSAtomicFifoDequeue APIs in libplatform. | |
33 | * | |
34 | * This code needs to be compiled as 1 section and should not make branches | |
35 | * outside of this section. This allows us to copy the entire section to the | |
36 | * text comm page once it is created - see osfmk/arm/commpage/commpage.c | |
37 | * | |
38 | * This section is split into 2 parts - the preemption-free zone (PFZ) routines | |
39 | * and the preemptible routines (non-PFZ). The PFZ routines will not be | |
40 | * preempted by the scheduler if the pc of the userspace process is in that | |
41 | * region while handling asynchronous interrupts (note that traps are still | |
42 | * possible in the PFZ). Instead, the scheduler will mark x15 (known through | |
43 | * coordination with the functions in the commpage section) to indicate to the | |
44 | * userspace code that it needs to take a delayed preemption. The PFZ functions | |
45 | * may make callouts to preemptible routines and vice-versa. When a function | |
46 | * returns to a preemptible routine after a callout to a function in the PFZ, it | |
47 | * needs to check x15 to determine if a delayed preemption needs to be taken. In | |
48 | * addition, functions in the PFZ should not have backwards branches. | |
49 | * | |
50 | * The entry point to execute code in the commpage text section is through the | |
51 | * jump table at the very top of the section. The base of the jump table is | |
52 | * exposed to userspace via the APPLE array and the offsets from the base of the | |
53 | * jump table are listed in the arm/cpu_capabilities.h header. Adding any new | |
54 | * functions in the PFZ requires a lockstep change to the cpu_capabilities.h | |
55 | * header. | |
56 | * | |
57 | * Functions in PFZ: | |
58 | * Enqueue function | |
59 | * Dequeue function | |
60 | * | |
61 | * Functions not in PFZ: | |
62 | * Backoff function as part of spin loop | |
63 | * Preempt function to take delayed preemption as indicated by kernel | |
64 | * | |
65 | * ---------------------------------------------------------------------- | |
66 | * | |
67 | * The high level goal of the asm code in this section is to enqueue and dequeue | |
68 | * from a FIFO linked list. | |
69 | * | |
70 | * typedef volatile struct { | |
71 | * void *opaque1; <-- ptr to first queue element or null | |
72 | * void *opaque2; <-- ptr to second queue element or null | |
73 | * int opaque3; <-- spinlock | |
74 | * } OSFifoQueueHead; | |
75 | * | |
76 | * This is done through a userspace spin lock stored in the linked list head | |
77 | * for synchronization. | |
78 | * | |
79 | * Here is the pseudocode for the spin lock acquire algorithm which is split | |
80 | * between the PFZ and the non-PFZ areas of the commpage text section. The | |
81 | * pseudocode here is just for the enqueue operation but it is symmetrical for | |
82 | * the dequeue operation. | |
83 | * | |
84 | * // Not in the PFZ. Entry from jump table. | |
85 | * ENQUEUE() | |
86 | * enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr); | |
87 | * // We're running here after running the TRY_LOCK_AND_ENQUEUE code in | |
88 | * // the PFZ so we need to check if we need to take a delayed | |
89 | * // preemption. | |
90 | * if (kernel_wants_to_preempt_us){ | |
91 | * // This is done through the pfz_exit() mach trap which is a dummy | |
92 | * // syscall whose sole purpose is to allow the thread to enter the | |
93 | * // kernel so that it can be preempted at AST. | |
94 | * enter_kernel_to_take_delayed_preemption() | |
95 | * } | |
96 | * | |
97 | * if (!enqueued) { | |
98 | * ARM_MONITOR; | |
99 | * WFE; | |
100 | * enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr); | |
101 | * if (!enqueued) { | |
102 | * // We failed twice, take a backoff | |
103 | * BACKOFF(); | |
104 | * goto ENQUEUE() | |
105 | * } else { | |
106 | * // We got here from PFZ, check for delayed preemption | |
107 | * if (kernel_wants_to_preempt_us){ | |
108 | * enter_kernel_to_take_delayed_preemption() | |
109 | * } | |
110 | * } | |
111 | * } | |
112 | * | |
113 | * // in PFZ | |
114 | * TRY_LOCK_AND_ENQUEUE(): | |
115 | * is_locked = try_lock(lock_addr); | |
116 | * if (is_locked) { | |
117 | * <do enqueue operation> | |
118 | * return true | |
119 | * } else { | |
120 | * return false | |
121 | * } | |
122 | * | |
123 | * | |
124 | * // Not in the PFZ | |
125 | * BACKOFF(): | |
126 | * // We're running here after running the TRY_LOCK_AND_ENQUEUE code in | |
127 | * // the PFZ so we need to check if we need to take a delayed | |
128 | * // preemption. | |
129 | * if (kernel_wants_to_preempt_us) { | |
130 | * enter_kernel_to_take_preemption() | |
131 | * } else { | |
132 | * // Note that it is safe to do this loop here since the entire | |
133 | * // BACKOFF function isn't in the PFZ and so can be preempted at any | |
134 | * // time | |
135 | * do { | |
136 | * lock_is_free = peek(lock_addr); | |
137 | * if (lock_is_free) { | |
138 | * return | |
139 | * } else { | |
140 | * pause_with_monitor(lock_addr) | |
141 | * } | |
142 | * } while (1) | |
143 | * } | |
144 | */ | |
145 | ||
146 | /* Macros and helpers */ | |
147 | ||
148 | .macro BACKOFF lock_addr | |
149 | // Save registers we can't clobber | |
150 | stp x0, x1, [sp, #-16]! | |
151 | stp x2, x9, [sp, #-16]! | |
152 | ||
153 | // Pass in lock addr to backoff function | |
154 | mov x0, \lock_addr | |
155 | bl _backoff // Jump out of the PFZ zone now | |
156 | ||
157 | // Restore registers | |
158 | ldp x2, x9, [sp], #16 | |
159 | ldp x0, x1, [sp], #16 | |
160 | .endmacro | |
161 | ||
162 | /* x0 = pointer to queue head | |
163 | * x1 = pointer to new elem to enqueue | |
164 | * x2 = offset of link field inside element | |
165 | * x3 = Address of lock | |
166 | * | |
167 | * Moves result of the helper function to the register specified | |
168 | */ | |
169 | .macro TRYLOCK_ENQUEUE result | |
170 | stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value | |
171 | ||
172 | bl _pfz_trylock_and_enqueue | |
173 | mov \result, x0 | |
174 | ||
175 | ldp x0, xzr, [sp], #16 // Restore saved registers | |
176 | .endmacro | |
177 | ||
178 | /* x0 = pointer to queue head | |
179 | * x1 = offset of link field inside element | |
180 | * x2 = Address of lock | |
181 | * | |
182 | * Moves result of the helper function to the register specified | |
183 | */ | |
184 | .macro TRYLOCK_DEQUEUE result | |
185 | stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value | |
186 | ||
187 | bl _pfz_trylock_and_dequeue | |
188 | mov \result, x0 | |
189 | ||
190 | ldp x0, xzr, [sp], #16 // Restore saved registers | |
191 | .endmacro | |
192 | ||
193 | /* | |
194 | * Takes a delayed preemption if needed and then branches to the label | |
195 | * specified. | |
196 | * | |
197 | * Modifies x15 | |
198 | */ | |
199 | .macro PREEMPT_SELF_THEN branch_to_take_on_success | |
200 | cbz x15, \branch_to_take_on_success // No delayed preemption to take, just try again | |
201 | ||
202 | mov x15, xzr // zero out the preemption pending field | |
203 | bl _preempt_self | |
204 | b \branch_to_take_on_success | |
205 | .endmacro | |
206 | ||
207 | .section __TEXT_EXEC,__commpage_text,regular,pure_instructions | |
208 | ||
209 | /* Preemption free functions */ | |
210 | .align 2 | |
211 | _jump_table: // 32 entry jump table, only 2 are used | |
212 | b _pfz_enqueue | |
213 | b _pfz_dequeue | |
214 | brk #666 | |
215 | brk #666 | |
216 | brk #666 | |
217 | brk #666 | |
218 | brk #666 | |
219 | brk #666 | |
220 | brk #666 | |
221 | brk #666 | |
222 | brk #666 | |
223 | brk #666 | |
224 | brk #666 | |
225 | brk #666 | |
226 | brk #666 | |
227 | brk #666 | |
228 | brk #666 | |
229 | brk #666 | |
230 | brk #666 | |
231 | brk #666 | |
232 | brk #666 | |
233 | brk #666 | |
234 | brk #666 | |
235 | brk #666 | |
236 | brk #666 | |
237 | brk #666 | |
238 | brk #666 | |
239 | brk #666 | |
240 | brk #666 | |
241 | brk #666 | |
242 | brk #666 | |
243 | brk #666 | |
244 | ||
245 | ||
246 | /* | |
247 | * typedef volatile struct { | |
248 | * void *opaque1; <-- ptr to first queue element or null | |
249 | * void *opaque2; <-- ptr to second queue element or null | |
250 | * int opaque3; <-- spinlock | |
251 | * } osfifoqueuehead; | |
252 | */ | |
253 | ||
254 | /* Non-preemptible helper routine to FIFO enqueue: | |
255 | * int pfz_trylock_and_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset, uint32_t *lock_addr); | |
256 | * | |
257 | * x0 = pointer to queue head structure | |
258 | * x1 = pointer to new element to enqueue | |
259 | * x2 = offset of link field inside element | |
260 | * x3 = address of lock | |
261 | * | |
262 | * Only caller save registers (x9 - x15) are used in this function | |
263 | * | |
264 | * Returns 0 on success and non-zero value on failure | |
265 | */ | |
266 | .globl _pfz_trylock_and_enqueue | |
267 | .align 2 | |
268 | _pfz_trylock_and_enqueue: | |
269 | ARM64_STACK_PROLOG | |
270 | PUSH_FRAME | |
271 | ||
272 | mov w10, wzr // unlock value = w10 = 0 | |
273 | mov w11, #1 // locked value = w11 = 1 | |
274 | ||
275 | // Try to grab the lock | |
276 | casa w10, w11, [x3] // Atomic CAS with acquire barrier | |
277 | cbz w10, Ltrylock_enqueue_success | |
278 | ||
279 | mov x0, #-1 // Failed | |
280 | b Ltrylock_enqueue_exit | |
281 | ||
282 | /* We got the lock, enqueue the element */ | |
283 | ||
284 | Ltrylock_enqueue_success: | |
285 | ldr x10, [x0, #8] // x10 = tail of the queue | |
286 | cbnz x10, Lnon_empty_queue // tail not NULL | |
287 | str x1, [x0] // Set head to new element | |
288 | b Lset_new_tail | |
289 | ||
290 | Lnon_empty_queue: | |
291 | str x1, [x10, x2] // Set old tail -> offset = new elem | |
292 | ||
293 | Lset_new_tail: | |
294 | str x1, [x0, #8] // Set tail = new elem | |
295 | ||
296 | // Drop spin lock with release barrier (pairs with acquire in casa) | |
297 | stlr wzr, [x3] | |
298 | ||
299 | mov x0, xzr // Mark success | |
300 | ||
301 | Ltrylock_enqueue_exit: | |
302 | POP_FRAME | |
303 | ARM64_STACK_EPILOG | |
304 | ||
305 | /* Non-preemptible helper routine to FIFO dequeue: | |
306 | * void *pfz_trylock_and_dequeue(OSFifoQueueHead *__list, size_t __offset, uint32_t *lock_addr); | |
307 | * | |
308 | * x0 = pointer to queue head structure | |
309 | * x1 = pointer to new element to enqueue | |
310 | * x2 = address of lock | |
311 | * | |
312 | * Only caller save registers (x9 - x15) are used in this function | |
313 | * | |
314 | * Returns -1 on failure, and the pointer on success (can be NULL) | |
315 | */ | |
316 | .globl _pfz_trylock_and_dequeue | |
317 | .align 2 | |
318 | _pfz_trylock_and_dequeue: | |
319 | ARM64_STACK_PROLOG | |
320 | PUSH_FRAME | |
321 | ||
322 | // Try to grab the lock | |
323 | mov w10, wzr // unlock value = w10 = 0 | |
324 | mov w11, #1 // locked value = w11 = 1 | |
325 | ||
326 | casa w10, w11, [x2] // Atomic CAS with acquire barrier | |
327 | cbz w10, Ltrylock_dequeue_success | |
328 | ||
329 | mov x0, #-1 // Failed | |
330 | b Ltrylock_dequeue_exit | |
331 | ||
332 | /* We got the lock, dequeue the element */ | |
333 | Ltrylock_dequeue_success: | |
334 | ldr x10, [x0] // x10 = head of the queue | |
335 | cbz x10, Lreturn_head // if head is null, return | |
336 | ||
337 | ldr x11, [x10, x1] // get ptr to new head | |
338 | cbnz x11, Lupdate_new_head // If new head != NULL, then not singleton. Only need to update head | |
339 | ||
340 | // Singleton case | |
341 | str xzr, [x0, #8] // dequeuing from singleton queue, update tail to NULL | |
342 | ||
343 | Lupdate_new_head: | |
344 | str xzr, [x10, x1] // zero the link in the old head | |
345 | str x11, [x0] // Set up a new head | |
346 | ||
347 | Lreturn_head: | |
348 | mov x0, x10 // Move head to x0 | |
349 | stlr wzr, [x2] // Drop spin lock with release barrier (pairs with acquire in casa) | |
350 | ||
351 | Ltrylock_dequeue_exit: | |
352 | POP_FRAME | |
353 | ARM64_STACK_EPILOG | |
354 | ||
355 | ||
356 | /* Preemptible functions */ | |
357 | .private_extern _commpage_text_preemptible_functions | |
358 | _commpage_text_preemptible_functions: | |
359 | ||
360 | ||
361 | /* | |
362 | * void pfz_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset); | |
363 | * x0 = pointer to queue head | |
364 | * x1 = pointer to new elem to enqueue | |
365 | * x2 = offset of link field inside element | |
366 | */ | |
367 | .globl _pfz_enqueue | |
368 | ||
369 | .align 2 | |
370 | _pfz_enqueue: | |
371 | ARM64_STACK_PROLOG | |
372 | PUSH_FRAME | |
373 | ||
374 | str xzr, [x1, x2] // Zero the forward link in the new element | |
375 | mov x15, xzr // zero out the register used to communicate with kernel | |
376 | ||
377 | add x3, x0, #16 // address of lock = x3 = x0 + 16 | |
378 | Lenqueue_trylock_loop: | |
379 | ||
380 | // Attempt #1 | |
381 | TRYLOCK_ENQUEUE x9 | |
382 | PREEMPT_SELF_THEN Lenqueue_determine_success | |
383 | ||
384 | Lenqueue_determine_success: | |
385 | ||
386 | cbz x9, Lenqueue_success // did we succeed? if so, exit | |
387 | ||
388 | ldxr w9, [x3] // arm the monitor for the lock address | |
389 | cbz w9, Lenqueue_clear_monitor // lock is available, retry. | |
390 | ||
391 | wfe // Wait with monitor armed | |
392 | ||
393 | // Attempt #2 | |
394 | TRYLOCK_ENQUEUE x9 | |
395 | cbz x9, Lenqueue_take_delayed_preemption_upon_success // did we succeed? if so, exit | |
396 | ||
397 | // We failed twice - backoff then try again | |
398 | ||
399 | BACKOFF x3 | |
400 | b Lenqueue_trylock_loop | |
401 | ||
402 | Lenqueue_clear_monitor: | |
403 | clrex // Pairs with the ldxr | |
404 | ||
405 | // Take a preemption if needed then branch to enqueue_trylock_loop | |
406 | PREEMPT_SELF_THEN Lenqueue_trylock_loop | |
407 | ||
408 | Lenqueue_take_delayed_preemption_upon_success: | |
409 | PREEMPT_SELF_THEN Lenqueue_success | |
410 | ||
411 | Lenqueue_success: | |
412 | POP_FRAME | |
413 | ARM64_STACK_EPILOG | |
414 | ||
415 | /* | |
416 | * void *pfz_dequeue(OSFifoQueueHead *__list, size_t __offset); | |
417 | * x0 = pointer to queue head | |
418 | * x1 = offset of link field inside element | |
419 | * | |
420 | * This function is not in the PFZ but calls out to a helper which is in the PFZ | |
421 | * (_pfz_trylock_and_dequeue) | |
422 | */ | |
423 | .globl _pfz_dequeue | |
424 | .align 2 | |
425 | _pfz_dequeue: | |
426 | ARM64_STACK_PROLOG | |
427 | PUSH_FRAME | |
428 | ||
429 | mov x15, xzr // zero out the register used to communicate with kernel | |
430 | ||
431 | add x2, x0, #16 // address of lock = x2 = x0 + 16 | |
432 | Ldequeue_trylock_loop: | |
433 | ||
434 | // Attempt #1 | |
435 | TRYLOCK_DEQUEUE x9 | |
436 | PREEMPT_SELF_THEN Ldequeue_determine_success | |
437 | ||
438 | Ldequeue_determine_success: | |
439 | cmp x9, #-1 // is result of dequeue == -1? | |
440 | b.ne Ldequeue_success // no, we succeeded | |
441 | ||
442 | ldxr w9, [x2] // arm the monitor for the lock address | |
443 | cbz w9, Ldequeue_clear_monitor // lock is available, retry. | |
444 | ||
445 | wfe // Wait with monitor armed | |
446 | ||
447 | // Attempt #2 | |
448 | TRYLOCK_DEQUEUE x9 | |
449 | cmp x9, #-1 // did we fail? | |
450 | b.ne Ldequeue_take_delayed_preemption_upon_success // no, we succeeded | |
451 | ||
452 | // We failed twice - backoff then try again | |
453 | ||
454 | BACKOFF x2 | |
455 | b Ldequeue_trylock_loop | |
456 | ||
457 | Ldequeue_take_delayed_preemption_upon_success: | |
458 | // We just got here after executing PFZ code, check if we need a preemption | |
459 | PREEMPT_SELF_THEN Ldequeue_success | |
460 | ||
461 | Ldequeue_clear_monitor: | |
462 | clrex // Pairs with the ldxr | |
463 | // Take a preemption if needed then branch to dequeue_trylock_loop. | |
464 | PREEMPT_SELF_THEN Ldequeue_trylock_loop | |
465 | ||
466 | Ldequeue_success: | |
467 | mov x0, x9 // Move x9 (where result was stored earlier) to x0 | |
468 | POP_FRAME | |
469 | ARM64_STACK_EPILOG | |
470 | ||
471 | ||
472 | /* void preempt_self(void) | |
473 | * | |
474 | * Make a syscall to take a preemption. This function is not in the PFZ. | |
475 | */ | |
476 | .align 2 | |
477 | _preempt_self: | |
478 | ARM64_STACK_PROLOG | |
479 | PUSH_FRAME | |
480 | ||
481 | // Save registers on which will be clobbered by mach trap on stack and keep | |
482 | // it 16 byte aligned | |
483 | stp x0, x1, [sp, #-16]! | |
484 | ||
485 | // Note: We don't need to caller save registers since svc will trigger an | |
486 | // exception and kernel will save and restore register state | |
487 | ||
488 | // Make syscall to take delayed preemption | |
489 | mov x16, #-58 // -58 = pfz_exit | |
490 | svc #0x80 | |
491 | ||
492 | // Restore registers from stack | |
493 | ldp x0, x1, [sp], #16 | |
494 | ||
495 | POP_FRAME | |
496 | ARM64_STACK_EPILOG | |
497 | ||
498 | /* | |
499 | * void backoff(uint32_t *lock_addr); | |
500 | * The function returns when it observes that the lock has become available. | |
501 | * This function is not in the PFZ. | |
502 | * | |
503 | * x0 = lock address | |
504 | */ | |
505 | .align 2 | |
506 | .globl _backoff | |
507 | _backoff: | |
508 | ARM64_STACK_PROLOG | |
509 | PUSH_FRAME | |
510 | ||
511 | cbz x15, Lno_preempt // Kernel doesn't want to preempt us, jump to loop | |
512 | ||
513 | mov x15, xzr // zero out the preemption pending field | |
514 | bl _preempt_self | |
515 | ||
516 | Lno_preempt: | |
517 | ldxr w9, [x0] // Snoop on lock and arm the monitor | |
518 | cbz w9, Lend_backoff // The lock seems to be available, return | |
519 | ||
520 | wfe // pause | |
521 | ||
522 | b Lno_preempt | |
523 | ||
524 | Lend_backoff: | |
525 | clrex | |
526 | ||
527 | POP_FRAME | |
528 | ARM64_STACK_EPILOG |