/* * Copyright (c) 2020 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include /* This section has all the code necessary for the atomic operations supported by * OSAtomicFifoEnqueue, OSAtomicFifoDequeue APIs in libplatform. * * This code needs to be compiled as 1 section and should not make branches * outside of this section. This allows us to copy the entire section to the * text comm page once it is created - see osfmk/arm/commpage/commpage.c * * This section is split into 2 parts - the preemption-free zone (PFZ) routines * and the preemptible routines (non-PFZ). The PFZ routines will not be * preempted by the scheduler if the pc of the userspace process is in that * region while handling asynchronous interrupts (note that traps are still * possible in the PFZ). Instead, the scheduler will mark x15 (known through * coordination with the functions in the commpage section) to indicate to the * userspace code that it needs to take a delayed preemption. The PFZ functions * may make callouts to preemptible routines and vice-versa. When a function * returns to a preemptible routine after a callout to a function in the PFZ, it * needs to check x15 to determine if a delayed preemption needs to be taken. In * addition, functions in the PFZ should not have backwards branches. * * The entry point to execute code in the commpage text section is through the * jump table at the very top of the section. The base of the jump table is * exposed to userspace via the APPLE array and the offsets from the base of the * jump table are listed in the arm/cpu_capabilities.h header. Adding any new * functions in the PFZ requires a lockstep change to the cpu_capabilities.h * header. * * Functions in PFZ: * Enqueue function * Dequeue function * * Functions not in PFZ: * Backoff function as part of spin loop * Preempt function to take delayed preemption as indicated by kernel * * ---------------------------------------------------------------------- * * The high level goal of the asm code in this section is to enqueue and dequeue * from a FIFO linked list. * * typedef volatile struct { * void *opaque1; <-- ptr to first queue element or null * void *opaque2; <-- ptr to second queue element or null * int opaque3; <-- spinlock * } OSFifoQueueHead; * * This is done through a userspace spin lock stored in the linked list head * for synchronization. * * Here is the pseudocode for the spin lock acquire algorithm which is split * between the PFZ and the non-PFZ areas of the commpage text section. The * pseudocode here is just for the enqueue operation but it is symmetrical for * the dequeue operation. * * // Not in the PFZ. Entry from jump table. * ENQUEUE() * enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr); * // We're running here after running the TRY_LOCK_AND_ENQUEUE code in * // the PFZ so we need to check if we need to take a delayed * // preemption. * if (kernel_wants_to_preempt_us){ * // This is done through the pfz_exit() mach trap which is a dummy * // syscall whose sole purpose is to allow the thread to enter the * // kernel so that it can be preempted at AST. * enter_kernel_to_take_delayed_preemption() * } * * if (!enqueued) { * ARM_MONITOR; * WFE; * enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr); * if (!enqueued) { * // We failed twice, take a backoff * BACKOFF(); * goto ENQUEUE() * } else { * // We got here from PFZ, check for delayed preemption * if (kernel_wants_to_preempt_us){ * enter_kernel_to_take_delayed_preemption() * } * } * } * * // in PFZ * TRY_LOCK_AND_ENQUEUE(): * is_locked = try_lock(lock_addr); * if (is_locked) { * * return true * } else { * return false * } * * * // Not in the PFZ * BACKOFF(): * // We're running here after running the TRY_LOCK_AND_ENQUEUE code in * // the PFZ so we need to check if we need to take a delayed * // preemption. * if (kernel_wants_to_preempt_us) { * enter_kernel_to_take_preemption() * } else { * // Note that it is safe to do this loop here since the entire * // BACKOFF function isn't in the PFZ and so can be preempted at any * // time * do { * lock_is_free = peek(lock_addr); * if (lock_is_free) { * return * } else { * pause_with_monitor(lock_addr) * } * } while (1) * } */ /* Macros and helpers */ .macro BACKOFF lock_addr // Save registers we can't clobber stp x0, x1, [sp, #-16]! stp x2, x9, [sp, #-16]! // Pass in lock addr to backoff function mov x0, \lock_addr bl _backoff // Jump out of the PFZ zone now // Restore registers ldp x2, x9, [sp], #16 ldp x0, x1, [sp], #16 .endmacro /* x0 = pointer to queue head * x1 = pointer to new elem to enqueue * x2 = offset of link field inside element * x3 = Address of lock * * Moves result of the helper function to the register specified */ .macro TRYLOCK_ENQUEUE result stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value bl _pfz_trylock_and_enqueue mov \result, x0 ldp x0, xzr, [sp], #16 // Restore saved registers .endmacro /* x0 = pointer to queue head * x1 = offset of link field inside element * x2 = Address of lock * * Moves result of the helper function to the register specified */ .macro TRYLOCK_DEQUEUE result stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value bl _pfz_trylock_and_dequeue mov \result, x0 ldp x0, xzr, [sp], #16 // Restore saved registers .endmacro /* * Takes a delayed preemption if needed and then branches to the label * specified. * * Modifies x15 */ .macro PREEMPT_SELF_THEN branch_to_take_on_success cbz x15, \branch_to_take_on_success // No delayed preemption to take, just try again mov x15, xzr // zero out the preemption pending field bl _preempt_self b \branch_to_take_on_success .endmacro .section __TEXT_EXEC,__commpage_text,regular,pure_instructions /* Preemption free functions */ .align 2 _jump_table: // 32 entry jump table, only 2 are used b _pfz_enqueue b _pfz_dequeue brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 brk #666 /* * typedef volatile struct { * void *opaque1; <-- ptr to first queue element or null * void *opaque2; <-- ptr to second queue element or null * int opaque3; <-- spinlock * } osfifoqueuehead; */ /* Non-preemptible helper routine to FIFO enqueue: * int pfz_trylock_and_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset, uint32_t *lock_addr); * * x0 = pointer to queue head structure * x1 = pointer to new element to enqueue * x2 = offset of link field inside element * x3 = address of lock * * Only caller save registers (x9 - x15) are used in this function * * Returns 0 on success and non-zero value on failure */ .globl _pfz_trylock_and_enqueue .align 2 _pfz_trylock_and_enqueue: ARM64_STACK_PROLOG PUSH_FRAME mov w10, wzr // unlock value = w10 = 0 mov w11, #1 // locked value = w11 = 1 // Try to grab the lock casa w10, w11, [x3] // Atomic CAS with acquire barrier cbz w10, Ltrylock_enqueue_success mov x0, #-1 // Failed b Ltrylock_enqueue_exit /* We got the lock, enqueue the element */ Ltrylock_enqueue_success: ldr x10, [x0, #8] // x10 = tail of the queue cbnz x10, Lnon_empty_queue // tail not NULL str x1, [x0] // Set head to new element b Lset_new_tail Lnon_empty_queue: str x1, [x10, x2] // Set old tail -> offset = new elem Lset_new_tail: str x1, [x0, #8] // Set tail = new elem // Drop spin lock with release barrier (pairs with acquire in casa) stlr wzr, [x3] mov x0, xzr // Mark success Ltrylock_enqueue_exit: POP_FRAME ARM64_STACK_EPILOG /* Non-preemptible helper routine to FIFO dequeue: * void *pfz_trylock_and_dequeue(OSFifoQueueHead *__list, size_t __offset, uint32_t *lock_addr); * * x0 = pointer to queue head structure * x1 = pointer to new element to enqueue * x2 = address of lock * * Only caller save registers (x9 - x15) are used in this function * * Returns -1 on failure, and the pointer on success (can be NULL) */ .globl _pfz_trylock_and_dequeue .align 2 _pfz_trylock_and_dequeue: ARM64_STACK_PROLOG PUSH_FRAME // Try to grab the lock mov w10, wzr // unlock value = w10 = 0 mov w11, #1 // locked value = w11 = 1 casa w10, w11, [x2] // Atomic CAS with acquire barrier cbz w10, Ltrylock_dequeue_success mov x0, #-1 // Failed b Ltrylock_dequeue_exit /* We got the lock, dequeue the element */ Ltrylock_dequeue_success: ldr x10, [x0] // x10 = head of the queue cbz x10, Lreturn_head // if head is null, return ldr x11, [x10, x1] // get ptr to new head cbnz x11, Lupdate_new_head // If new head != NULL, then not singleton. Only need to update head // Singleton case str xzr, [x0, #8] // dequeuing from singleton queue, update tail to NULL Lupdate_new_head: str xzr, [x10, x1] // zero the link in the old head str x11, [x0] // Set up a new head Lreturn_head: mov x0, x10 // Move head to x0 stlr wzr, [x2] // Drop spin lock with release barrier (pairs with acquire in casa) Ltrylock_dequeue_exit: POP_FRAME ARM64_STACK_EPILOG /* Preemptible functions */ .private_extern _commpage_text_preemptible_functions _commpage_text_preemptible_functions: /* * void pfz_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset); * x0 = pointer to queue head * x1 = pointer to new elem to enqueue * x2 = offset of link field inside element */ .globl _pfz_enqueue .align 2 _pfz_enqueue: ARM64_STACK_PROLOG PUSH_FRAME str xzr, [x1, x2] // Zero the forward link in the new element mov x15, xzr // zero out the register used to communicate with kernel add x3, x0, #16 // address of lock = x3 = x0 + 16 Lenqueue_trylock_loop: // Attempt #1 TRYLOCK_ENQUEUE x9 PREEMPT_SELF_THEN Lenqueue_determine_success Lenqueue_determine_success: cbz x9, Lenqueue_success // did we succeed? if so, exit ldxr w9, [x3] // arm the monitor for the lock address cbz w9, Lenqueue_clear_monitor // lock is available, retry. wfe // Wait with monitor armed // Attempt #2 TRYLOCK_ENQUEUE x9 cbz x9, Lenqueue_take_delayed_preemption_upon_success // did we succeed? if so, exit // We failed twice - backoff then try again BACKOFF x3 b Lenqueue_trylock_loop Lenqueue_clear_monitor: clrex // Pairs with the ldxr // Take a preemption if needed then branch to enqueue_trylock_loop PREEMPT_SELF_THEN Lenqueue_trylock_loop Lenqueue_take_delayed_preemption_upon_success: PREEMPT_SELF_THEN Lenqueue_success Lenqueue_success: POP_FRAME ARM64_STACK_EPILOG /* * void *pfz_dequeue(OSFifoQueueHead *__list, size_t __offset); * x0 = pointer to queue head * x1 = offset of link field inside element * * This function is not in the PFZ but calls out to a helper which is in the PFZ * (_pfz_trylock_and_dequeue) */ .globl _pfz_dequeue .align 2 _pfz_dequeue: ARM64_STACK_PROLOG PUSH_FRAME mov x15, xzr // zero out the register used to communicate with kernel add x2, x0, #16 // address of lock = x2 = x0 + 16 Ldequeue_trylock_loop: // Attempt #1 TRYLOCK_DEQUEUE x9 PREEMPT_SELF_THEN Ldequeue_determine_success Ldequeue_determine_success: cmp x9, #-1 // is result of dequeue == -1? b.ne Ldequeue_success // no, we succeeded ldxr w9, [x2] // arm the monitor for the lock address cbz w9, Ldequeue_clear_monitor // lock is available, retry. wfe // Wait with monitor armed // Attempt #2 TRYLOCK_DEQUEUE x9 cmp x9, #-1 // did we fail? b.ne Ldequeue_take_delayed_preemption_upon_success // no, we succeeded // We failed twice - backoff then try again BACKOFF x2 b Ldequeue_trylock_loop Ldequeue_take_delayed_preemption_upon_success: // We just got here after executing PFZ code, check if we need a preemption PREEMPT_SELF_THEN Ldequeue_success Ldequeue_clear_monitor: clrex // Pairs with the ldxr // Take a preemption if needed then branch to dequeue_trylock_loop. PREEMPT_SELF_THEN Ldequeue_trylock_loop Ldequeue_success: mov x0, x9 // Move x9 (where result was stored earlier) to x0 POP_FRAME ARM64_STACK_EPILOG /* void preempt_self(void) * * Make a syscall to take a preemption. This function is not in the PFZ. */ .align 2 _preempt_self: ARM64_STACK_PROLOG PUSH_FRAME // Save registers on which will be clobbered by mach trap on stack and keep // it 16 byte aligned stp x0, x1, [sp, #-16]! // Note: We don't need to caller save registers since svc will trigger an // exception and kernel will save and restore register state // Make syscall to take delayed preemption mov x16, #-58 // -58 = pfz_exit svc #0x80 // Restore registers from stack ldp x0, x1, [sp], #16 POP_FRAME ARM64_STACK_EPILOG /* * void backoff(uint32_t *lock_addr); * The function returns when it observes that the lock has become available. * This function is not in the PFZ. * * x0 = lock address */ .align 2 .globl _backoff _backoff: ARM64_STACK_PROLOG PUSH_FRAME cbz x15, Lno_preempt // Kernel doesn't want to preempt us, jump to loop mov x15, xzr // zero out the preemption pending field bl _preempt_self Lno_preempt: ldxr w9, [x0] // Snoop on lock and arm the monitor cbz w9, Lend_backoff // The lock seems to be available, return wfe // pause b Lno_preempt Lend_backoff: clrex POP_FRAME ARM64_STACK_EPILOG