2  * Copyright (c) 2020 Apple Inc. All rights reserved.
 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 
   6  * This file contains Original Code and/or Modifications of Original Code
 
   7  * as defined in and that are subject to the Apple Public Source License
 
   8  * Version 2.0 (the 'License'). You may not use this file except in
 
   9  * compliance with the License. The rights granted to you under the License
 
  10  * may not be used to create, or enable the creation or redistribution of,
 
  11  * unlawful or unlicensed copies of an Apple operating system, or to
 
  12  * circumvent, violate, or enable the circumvention or violation of, any
 
  13  * terms of an Apple operating system software license agreement.
 
  15  * Please obtain a copy of the License at
 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
 
  18  * The Original Code and all software distributed under the License are
 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 
  23  * Please see the License for the specific language governing rights and
 
  24  * limitations under the License.
 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 
  29 #include <machine/asm.h>
 
  31 /* This section has all the code necessary for the atomic operations supported by
 
  32  * OSAtomicFifoEnqueue, OSAtomicFifoDequeue APIs in libplatform.
 
  34  * This code needs to be compiled as 1 section and should not make branches
 
  35  * outside of this section. This allows us to copy the entire section to the
 
  36  * text comm page once it is created - see osfmk/arm/commpage/commpage.c
 
  38  * This section is split into 2 parts - the preemption-free zone (PFZ) routines
 
  39  * and the preemptible routines (non-PFZ). The PFZ routines will not be
 
  40  * preempted by the scheduler if the pc of the userspace process is in that
 
  41  * region while handling asynchronous interrupts (note that traps are still
 
  42  * possible in the PFZ). Instead, the scheduler will mark x15 (known through
 
  43  * coordination with the functions in the commpage section) to indicate to the
 
  44  * userspace code that it needs to take a delayed preemption. The PFZ functions
 
  45  * may make callouts to preemptible routines and vice-versa. When a function
 
  46  * returns to a preemptible routine after a callout to a function in the PFZ, it
 
  47  * needs to check x15 to determine if a delayed preemption needs to be taken. In
 
  48  * addition, functions in the PFZ should not have backwards branches.
 
  50  * The entry point to execute code in the commpage text section is through the
 
  51  * jump table at the very top of the section. The base of the jump table is
 
  52  * exposed to userspace via the APPLE array and the offsets from the base of the
 
  53  * jump table are listed in the arm/cpu_capabilities.h header. Adding any new
 
  54  * functions in the PFZ requires a lockstep change to the cpu_capabilities.h
 
  61  * Functions not in PFZ:
 
  62  *              Backoff function as part of spin loop
 
  63  *              Preempt function to take delayed preemption as indicated by kernel
 
  65  * ----------------------------------------------------------------------
 
  67  * The high level goal of the asm code in this section is to enqueue and dequeue
 
  68  * from a FIFO linked list.
 
  70  * typedef volatile struct {
 
  71  *              void *opaque1; <-- ptr to first queue element or null
 
  72  *              void *opaque2; <-- ptr to second queue element or null
 
  73  *              int opaque3; <-- spinlock
 
  76  * This is done through a userspace spin lock stored in the linked list head
 
  77  * for synchronization.
 
  79  * Here is the pseudocode for the spin lock acquire algorithm which is split
 
  80  * between the PFZ and the non-PFZ areas of the commpage text section. The
 
  81  * pseudocode here is just for the enqueue operation but it is symmetrical for
 
  82  * the dequeue operation.
 
  84  * // Not in the PFZ. Entry from jump table.
 
  86  *              enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr);
 
  87  *              // We're running here after running the TRY_LOCK_AND_ENQUEUE code in
 
  88  *              // the PFZ so we need to check if we need to take a delayed
 
  90  *              if (kernel_wants_to_preempt_us){
 
  91  *                      // This is done through the pfz_exit() mach trap which is a dummy
 
  92  *                      // syscall whose sole purpose is to allow the thread to enter the
 
  93  *                      // kernel so that it can be preempted at AST.
 
  94  *                      enter_kernel_to_take_delayed_preemption()
 
 100  *                      enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr);
 
 102  *                              // We failed twice, take a backoff
 
 106  *                              // We got here from PFZ, check for delayed preemption
 
 107  *                              if (kernel_wants_to_preempt_us){
 
 108  *                                      enter_kernel_to_take_delayed_preemption()
 
 114  * TRY_LOCK_AND_ENQUEUE():
 
 115  *              is_locked = try_lock(lock_addr);
 
 117  *                      <do enqueue operation>
 
 126  *              // We're running here after running the TRY_LOCK_AND_ENQUEUE code in
 
 127  *              // the PFZ so we need to check if we need to take a delayed
 
 129  *              if (kernel_wants_to_preempt_us) {
 
 130  *                      enter_kernel_to_take_preemption()
 
 132  *                      // Note that it is safe to do this loop here since the entire
 
 133  *                      // BACKOFF function isn't in the PFZ and so can be preempted at any
 
 136  *                              lock_is_free = peek(lock_addr);
 
 137  *                              if (lock_is_free) {
 
 140  *                                      pause_with_monitor(lock_addr)
 
 146 /* Macros and helpers */
 
 148 .macro BACKOFF lock_addr
 
 149         // Save registers we can't clobber
 
 150         stp             x0, x1, [sp, #-16]!
 
 151         stp             x2, x9, [sp, #-16]!
 
 153         // Pass in lock addr to backoff function
 
 155         bl              _backoff                        // Jump out of the PFZ zone now
 
 158         ldp             x2, x9, [sp], #16
 
 159         ldp             x0, x1, [sp], #16
 
 162 /* x0 = pointer to queue head
 
 163  * x1 = pointer to new elem to enqueue
 
 164  * x2 = offset of link field inside element
 
 165  * x3 = Address of lock
 
 167  * Moves result of the helper function to the register specified
 
 169 .macro TRYLOCK_ENQUEUE result
 
 170         stp             x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value
 
 172         bl      _pfz_trylock_and_enqueue
 
 175         ldp             x0, xzr, [sp], #16 // Restore saved registers
 
 178 /* x0 = pointer to queue head
 
 179  * x1 = offset of link field inside element
 
 180  * x2 = Address of lock
 
 182  * Moves result of the helper function to the register specified
 
 184 .macro TRYLOCK_DEQUEUE result
 
 185         stp             x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value
 
 187         bl      _pfz_trylock_and_dequeue
 
 190         ldp             x0, xzr, [sp], #16 // Restore saved registers
 
 194  * Takes a delayed preemption if needed and then branches to the label
 
 199 .macro PREEMPT_SELF_THEN branch_to_take_on_success
 
 200         cbz             x15,  \branch_to_take_on_success // No delayed preemption to take, just try again
 
 202         mov             x15, xzr                                // zero out the preemption pending field
 
 204         b \branch_to_take_on_success
 
 207         .section __TEXT_EXEC,__commpage_text,regular,pure_instructions
 
 209         /* Preemption free functions */
 
 211 _jump_table:                            // 32 entry jump table, only 2 are used
 
 247  * typedef volatile struct {
 
 248  *              void *opaque1; <-- ptr to first queue element or null
 
 249  *              void *opaque2; <-- ptr to second queue element or null
 
 250  *              int opaque3; <-- spinlock
 
 254 /* Non-preemptible helper routine to FIFO enqueue:
 
 255  * int pfz_trylock_and_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset, uint32_t *lock_addr);
 
 257  * x0 = pointer to queue head structure
 
 258  * x1 = pointer to new element to enqueue
 
 259  * x2 = offset of link field inside element
 
 260  * x3 = address of lock
 
 262  * Only caller save registers (x9 - x15) are used in this function
 
 264  * Returns 0 on success and non-zero value on failure
 
 266         .globl _pfz_trylock_and_enqueue
 
 268 _pfz_trylock_and_enqueue:
 
 272         mov             w10, wzr                 // unlock value = w10 = 0
 
 273         mov             w11, #1                  // locked value = w11 = 1
 
 275         // Try to grab the lock
 
 276         casa    w10, w11, [x3]   // Atomic CAS with acquire barrier
 
 277         cbz             w10, Ltrylock_enqueue_success
 
 279         mov             x0, #-1                 // Failed
 
 280         b Ltrylock_enqueue_exit
 
 282         /* We got the lock, enqueue the element */
 
 284 Ltrylock_enqueue_success:
 
 285         ldr             x10, [x0, #8]    // x10 = tail of the queue
 
 286         cbnz    x10, Lnon_empty_queue // tail not NULL
 
 287         str             x1, [x0]                 // Set head to new element
 
 291         str             x1, [x10, x2]   // Set old tail -> offset = new elem
 
 294         str             x1, [x0, #8]            // Set tail = new elem
 
 296         // Drop spin lock with release barrier (pairs with acquire in casa)
 
 299         mov             x0, xzr                         // Mark success
 
 301 Ltrylock_enqueue_exit:
 
 305 /* Non-preemptible helper routine to FIFO dequeue:
 
 306  * void *pfz_trylock_and_dequeue(OSFifoQueueHead *__list, size_t __offset, uint32_t *lock_addr);
 
 308  * x0 = pointer to queue head structure
 
 309  * x1 = pointer to new element to enqueue
 
 310  * x2 = address of lock
 
 312  * Only caller save registers (x9 - x15) are used in this function
 
 314  * Returns -1 on failure, and the pointer on success (can be NULL)
 
 316         .globl _pfz_trylock_and_dequeue
 
 318 _pfz_trylock_and_dequeue:
 
 322         // Try to grab the lock
 
 323         mov             w10, wzr                 // unlock value = w10 = 0
 
 324         mov             w11, #1                  // locked value = w11 = 1
 
 326         casa    w10, w11, [x2]   // Atomic CAS with acquire barrier
 
 327         cbz             w10, Ltrylock_dequeue_success
 
 329         mov             x0, #-1                 // Failed
 
 330         b Ltrylock_dequeue_exit
 
 332         /* We got the lock, dequeue the element */
 
 333 Ltrylock_dequeue_success:
 
 334         ldr             x10, [x0]        // x10 = head of the queue
 
 335         cbz             x10, Lreturn_head // if head is null, return
 
 337         ldr             x11, [x10, x1]  // get ptr to new head
 
 338         cbnz    x11, Lupdate_new_head // If new head != NULL, then not singleton. Only need to update head
 
 341         str             xzr, [x0, #8]   // dequeuing from singleton queue, update tail to NULL
 
 344         str             xzr, [x10, x1]  // zero the link in the old head
 
 345         str             x11, [x0]               // Set up a new head
 
 348         mov             x0, x10                 // Move head to x0
 
 349         stlr    wzr, [x2]               // Drop spin lock with release barrier (pairs with acquire in casa)
 
 351 Ltrylock_dequeue_exit:
 
 356         /* Preemptible functions */
 
 357         .private_extern _commpage_text_preemptible_functions
 
 358 _commpage_text_preemptible_functions:
 
 362  * void pfz_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset);
 
 363  * x0 = pointer to queue head
 
 364  * x1 = pointer to new elem to enqueue
 
 365  * x2 = offset of link field inside element
 
 374         str             xzr, [x1, x2]   // Zero the forward link in the new element
 
 375         mov             x15, xzr                // zero out the register used to communicate with kernel
 
 377         add             x3, x0, #16             // address of lock = x3 = x0 + 16
 
 378 Lenqueue_trylock_loop:
 
 382         PREEMPT_SELF_THEN Lenqueue_determine_success
 
 384 Lenqueue_determine_success:
 
 386         cbz             x9, Lenqueue_success // did we succeed? if so, exit
 
 388         ldxr    w9, [x3]                // arm the monitor for the lock address
 
 389         cbz             w9, Lenqueue_clear_monitor // lock is available, retry.
 
 391         wfe                                             // Wait with monitor armed
 
 395         cbz             x9, Lenqueue_take_delayed_preemption_upon_success  // did we succeed? if so, exit
 
 397         // We failed twice - backoff then try again
 
 400         b Lenqueue_trylock_loop
 
 402 Lenqueue_clear_monitor:
 
 403         clrex                                                   // Pairs with the ldxr
 
 405         // Take a preemption if needed then branch to enqueue_trylock_loop
 
 406         PREEMPT_SELF_THEN Lenqueue_trylock_loop
 
 408 Lenqueue_take_delayed_preemption_upon_success:
 
 409         PREEMPT_SELF_THEN Lenqueue_success
 
 416  * void *pfz_dequeue(OSFifoQueueHead *__list, size_t __offset);
 
 417  * x0 = pointer to queue head
 
 418  * x1 = offset of link field inside element
 
 420  * This function is not in the PFZ but calls out to a helper which is in the PFZ
 
 421  * (_pfz_trylock_and_dequeue)
 
 429         mov             x15, xzr                // zero out the register used to communicate with kernel
 
 431         add             x2, x0, #16             // address of lock = x2 = x0 + 16
 
 432 Ldequeue_trylock_loop:
 
 436         PREEMPT_SELF_THEN Ldequeue_determine_success
 
 438 Ldequeue_determine_success:
 
 439         cmp             x9, #-1                 // is result of dequeue == -1?
 
 440         b.ne    Ldequeue_success // no, we succeeded
 
 442         ldxr    w9, [x2]                // arm the monitor for the lock address
 
 443         cbz             w9, Ldequeue_clear_monitor // lock is available, retry.
 
 445         wfe                                             // Wait with monitor armed
 
 449         cmp             x9, #-1         // did we fail?
 
 450         b.ne    Ldequeue_take_delayed_preemption_upon_success // no, we succeeded
 
 452         // We failed twice - backoff then try again
 
 455         b       Ldequeue_trylock_loop
 
 457 Ldequeue_take_delayed_preemption_upon_success:
 
 458         // We just got here after executing PFZ code, check if we need a preemption
 
 459         PREEMPT_SELF_THEN Ldequeue_success
 
 461 Ldequeue_clear_monitor:
 
 462         clrex                                                   // Pairs with the ldxr
 
 463         // Take a preemption if needed then branch to dequeue_trylock_loop.
 
 464         PREEMPT_SELF_THEN Ldequeue_trylock_loop
 
 467         mov             x0, x9          // Move x9 (where result was stored earlier) to x0
 
 472 /* void preempt_self(void)
 
 474  * Make a syscall to take a preemption. This function is not in the PFZ.
 
 481         // Save registers on which will be clobbered by mach trap on stack and keep
 
 482         // it 16 byte aligned
 
 483         stp             x0, x1, [sp, #-16]!
 
 485         // Note: We don't need to caller save registers since svc will trigger an
 
 486         // exception and kernel will save and restore register state
 
 488         // Make syscall to take delayed preemption
 
 489         mov             x16, #-58       // -58 = pfz_exit
 
 492         // Restore registers from stack
 
 493         ldp             x0, x1, [sp], #16
 
 499  *      void backoff(uint32_t *lock_addr);
 
 500  * The function returns when it observes that the lock has become available.
 
 501  * This function is not in the PFZ.
 
 511         cbz             x15, Lno_preempt        // Kernel doesn't want to preempt us, jump to loop
 
 513         mov             x15, xzr        // zero out the preemption pending field
 
 517         ldxr    w9, [x0]                // Snoop on lock and arm the monitor
 
 518         cbz             w9, Lend_backoff // The lock seems to be available, return