/* * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include #include #include #include /* PREEMPTION FREE ZONE (PFZ) * * A portion of the commpage is speacial-cased by the kernel to be "preemption free", * ie as if we had disabled interrupts in user mode. This facilitates writing * "nearly-lockless" code, for example code that must be serialized by a spinlock but * which we do not want to preempt while the spinlock is held. * * The PFZ is implemented by collecting all the "preemption-free" code into a single * contiguous region of the commpage. Register %ebx is used as a flag register; * before entering the PFZ, %ebx is cleared. If some event occurs that would normally * result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of * preempting. Then, when the routine leaves the PFZ we check %ebx and * if nonzero execute a special "pfz_exit" syscall to take the delayed preemption. * * PFZ code must bound the amount of time spent in the PFZ, in order to control * latency. Backward branches are dangerous and must not be used in a way that * could inadvertently create a long-running loop. * * Because they cannot be implemented reasonably without a lock, we put the "atomic" * FIFO enqueue and dequeue in the PFZ. As long as we don't take a page fault trying to * access queue elements, these implementations behave nearly-locklessly. * But we still must take a spinlock to serialize, and in case of page faults. */ /* * typedef volatile struct { * void *opaque1; <-- ptr to first queue element or null * void *opaque2; <-- ptr to last queue element or null * int opaque3; <-- spinlock * } OSFifoQueueHead; * * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); */ COMMPAGE_FUNCTION_START(AtomicFifoEnqueue, 32, 4) pushl %edi pushl %esi pushl %ebx xorl %ebx,%ebx // clear "preemption pending" flag movl 16(%esp),%edi // %edi == ptr to list head movl 20(%esp),%esi // %esi == new movl 24(%esp),%edx // %edx == offset COMMPAGE_CALL(_COMM_PAGE_PFZ_ENQUEUE,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue) testl %ebx,%ebx // pending preemption? jz 1f COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue) 1: popl %ebx popl %esi popl %edi ret COMMPAGE_DESCRIPTOR(AtomicFifoEnqueue,_COMM_PAGE_FIFO_ENQUEUE,0,0) /* void* OSAtomicFifoDequeue( OSFifoQueueHead *list, size_t offset); */ COMMPAGE_FUNCTION_START(AtomicFifoDequeue, 32, 4) pushl %edi pushl %esi pushl %ebx xorl %ebx,%ebx // clear "preemption pending" flag movl 16(%esp),%edi // %edi == ptr to list head movl 20(%esp),%edx // %edx == offset COMMPAGE_CALL(_COMM_PAGE_PFZ_DEQUEUE,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue) testl %ebx,%ebx // pending preemption? jz 1f pushl %eax // save return value across sysenter COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue) popl %eax 1: popl %ebx popl %esi popl %edi ret // ptr to 1st element in Q still in %eax COMMPAGE_DESCRIPTOR(AtomicFifoDequeue,_COMM_PAGE_FIFO_DEQUEUE,0,0) /* Subroutine to make a preempt syscall. Called when we notice %ebx is * nonzero after returning from a PFZ subroutine. * When we enter kernel: * %edx = return address * %ecx = stack ptr * Destroys %eax, %ecx, and %edx. */ COMMPAGE_FUNCTION_START(preempt, 32, 4) popl %edx // get return address movl %esp,%ecx // save stack ptr here movl $(-58),%eax /* 58 = pfz_exit */ xorl %ebx,%ebx // clear "preemption pending" flag sysenter COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT,0,0) /* Subroutine to back off if we cannot get the spinlock. Called * after a few attempts inline in the PFZ subroutines. This code is * not in the PFZ. * %edi = ptr to queue head structure * %ebx = preemption flag (nonzero if preemption pending) * Destroys %eax. */ COMMPAGE_FUNCTION_START(backoff, 32, 4) testl %ebx,%ebx // does kernel want to preempt us? jz 1f // no xorl %ebx,%ebx // yes, clear flag pushl %edx // preserve regs used by preempt syscall pushl %ecx COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff) popl %ecx popl %edx 1: pause // SMT-friendly backoff cmpl $0,8(%edi) // sniff the lockword jnz 1b // loop if still taken ret // lockword is free, so reenter PFZ COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF,0,0) /* Preemption-free-zone routine to FIFO Enqueue: * %edi = ptr to queue head structure * %esi = ptr to element to enqueue * %edx = offset of link field in elements * %ebx = preemption flag (kernel sets nonzero if we should preempt) */ COMMPAGE_FUNCTION_START(pfz_enqueue, 32, 4) movl $0,(%edx,%esi) // zero forward link in new element 1: xorl %eax, %eax orl $-1, %ecx lock cmpxchgl %ecx, 8(%edi) // try to take the spinlock jz 2f // got it pause xorl %eax, %eax lock cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock jz 2f // got it pause xorl %eax, %eax lock cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock jz 2f // got it COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue) jmp 1b // loop to try again 2: movl 4(%edi),%ecx // get ptr to last element in q testl %ecx,%ecx // q null? jnz 3f // no movl %esi,(%edi) // q empty so this is first element jmp 4f 3: movl %esi,(%edx,%ecx) // point to new element from last 4: movl %esi,4(%edi) // new element becomes last in q movl $0,8(%edi) // unlock spinlock ret COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE,0,0) /* Preemption-free-zone routine to FIFO Dequeue: * %edi = ptr to queue head structure * %edx = offset of link field in elements * %ebx = preemption flag (kernel sets nonzero if we should preempt) * * Returns with next element (or 0) in %eax. */ COMMPAGE_FUNCTION_START(pfz_dequeue, 32, 4) 1: xorl %eax, %eax orl $-1, %ecx lock cmpxchgl %ecx, 8(%edi) // try to take the spinlock jz 2f // got it pause xorl %eax, %eax lock cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock jz 2f // got it pause xorl %eax, %eax lock cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock jz 2f // got it COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue) jmp 1b // loop to try again 2: movl (%edi),%eax // get ptr to first element in q testl %eax,%eax // q null? jz 4f // yes movl (%edx,%eax),%esi// get ptr to 2nd element in q testl %esi,%esi // is there a 2nd element? jnz 3f // yes movl %esi,4(%edi) // clear "last" field of q head 3: movl %esi,(%edi) // update "first" field of q head 4: movl $0,8(%edi) // unlock spinlock ret COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE,0,0) /************************* x86_64 versions follow **************************/ /* * typedef volatile struct { * void *opaque1; <-- ptr to first queue element or null * void *opaque2; <-- ptr to last queue element or null * int opaque3; <-- spinlock * } OSFifoQueueHead; * * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); */ // %rdi == list head, %rsi == new, %rdx == offset COMMPAGE_FUNCTION_START(AtomicFifoEnqueue_64, 64, 4) pushq %rbx xorl %ebx,%ebx // clear "preemption pending" flag COMMPAGE_CALL(_COMM_PAGE_PFZ_ENQUEUE,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue_64) testl %ebx,%ebx // pending preemption? jz 1f COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue_64) 1: popq %rbx ret COMMPAGE_DESCRIPTOR(AtomicFifoEnqueue_64,_COMM_PAGE_FIFO_ENQUEUE,0,0) /* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */ // %rdi == list head, %rsi == offset COMMPAGE_FUNCTION_START(AtomicFifoDequeue_64, 64, 4) pushq %rbx xorl %ebx,%ebx // clear "preemption pending" flag movq %rsi,%rdx // move offset to %rdx to be like the Enqueue case COMMPAGE_CALL(_COMM_PAGE_PFZ_DEQUEUE,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue_64) testl %ebx,%ebx // pending preemption? jz 1f COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue_64) 1: popq %rbx ret // ptr to 1st element in Q in %rax COMMPAGE_DESCRIPTOR(AtomicFifoDequeue_64,_COMM_PAGE_FIFO_DEQUEUE,0,0) /* Subroutine to make a preempt syscall. Called when we notice %ebx is * nonzero after returning from a PFZ subroutine. Not in PFZ. * * All registers preserved (but does clear the %ebx preemption flag). */ COMMPAGE_FUNCTION_START(preempt_64, 64, 4) pushq %rax pushq %rcx pushq %r11 movl $(SYSCALL_CONSTRUCT_MACH(58)),%eax /* 58 = pfz_exit */ xorl %ebx,%ebx syscall popq %r11 popq %rcx popq %rax ret COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT,0,0) /* Subroutine to back off if we cannot get the spinlock. Called * after a few attempts inline in the PFZ subroutines. This code is * not in the PFZ. * %rdi = ptr to queue head structure * %ebx = preemption flag (nonzero if preemption pending) * Uses: %rax. */ COMMPAGE_FUNCTION_START(backoff_64, 64, 4) testl %ebx,%ebx // does kernel want to preempt us? jz 1f // no COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff_64) 1: pause // SMT-friendly backoff cmpl $0,16(%rdi) // sniff the lockword jnz 1b // loop if still taken ret // lockword is free, so reenter PFZ COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF,0,0) /* Preemption-free-zone routine to FIFO Enqueue: * %rdi = ptr to queue head structure * %rsi = ptr to new element to enqueue * %rdx = offset of link field in elements * %ebx = preemption flag (kernel sets nonzero if we should preempt) */ COMMPAGE_FUNCTION_START(pfz_enqueue_64, 64, 4) movq $0,(%rdx,%rsi) // zero forward link in new element 1: xorl %eax, %eax orl $-1, %ecx lock cmpxchgl %ecx,16(%rdi) // try to take the spinlock jz 2f // got it pause xorl %eax, %eax lock cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock jz 2f // got it pause xorl %eax, %eax lock cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock jz 2f // got it COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64) jmp 1b // loop to try again 2: movq 8(%rdi),%rcx // get ptr to last element in q testq %rcx,%rcx // q null? jnz 3f // no movq %rsi,(%rdi) // q empty so this is first element jmp 4f 3: movq %rsi,(%rdx,%rcx) // point to new element from last 4: movq %rsi,8(%rdi) // new element becomes last in q movl $0,16(%rdi) // unlock spinlock ret COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE,0,0) /* Preemption-free-zone routine to FIFO Dequeue: * %rdi = ptr to queue head structure * %rdx = offset of link field in elements * %ebx = preemption flag (kernel sets nonzero if we should preempt) * * Returns with next element (or 0) in %rax. */ COMMPAGE_FUNCTION_START(pfz_dequeue_64, 64, 4) 1: xorl %eax, %eax orl $-1, %ecx lock cmpxchgl %ecx,16(%rdi) // try to take the spinlock jz 2f // got it pause xorl %eax, %eax lock cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock jz 2f // got it pause xorl %eax, %eax lock cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock jz 2f // got it COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64) jmp 1b // loop to try again 2: movq (%rdi),%rax // get ptr to first element in q testq %rax,%rax // q null? jz 4f // yes movq (%rdx,%rax),%rsi// get ptr to 2nd element in q testq %rsi,%rsi // is there a 2nd element? jnz 3f // yes movq %rsi,8(%rdi) // no - clear "last" field of q head 3: movq %rsi,(%rdi) // update "first" field of q head 4: movl $0,16(%rdi) // unlock spinlock ret COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE,0,0)