X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e5568f75972dfc723778653c11cb6b4dc825716a..7ddcb079202367355dddccdfa4318e57d50318be:/osfmk/i386/machine_routines_asm.s diff --git a/osfmk/i386/machine_routines_asm.s b/osfmk/i386/machine_routines_asm.s index eba45c21c..0e3d9fb68 100644 --- a/osfmk/i386/machine_routines_asm.s +++ b/osfmk/i386/machine_routines_asm.s @@ -1,25 +1,38 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + #include +#include +#include +#include +#include +#include +#include /* ** ml_get_timebase() @@ -31,262 +44,268 @@ */ ENTRY(ml_get_timebase) - movl S_ARG0, %ecx - - rdtsc - - movl %edx, 0(%ecx) - movl %eax, 4(%ecx) - - ret - + movl S_ARG0, %ecx + + lfence + rdtsc + lfence + + movl %edx, 0(%ecx) + movl %eax, 4(%ecx) + + ret -/* PCI config cycle probing +/* + * Convert between various timer units * - * boolean_t ml_probe_read(vm_offset_t paddr, unsigned int *val) + * uint64_t tmrCvt(uint64_t time, uint64_t *conversion) * - * Read the memory location at physical address paddr. - * This is a part of a device probe, so there is a good chance we will - * have a machine check here. So we have to be able to handle that. - * We assume that machine checks are enabled both in MSR and HIDs - */ -ENTRY(ml_probe_read) - - movl S_ARG0, %ecx - movl S_ARG1, %eax - movl 0(%ecx), %ecx - movl %ecx, 0(%eax) - movl $1, %eax - - ret - - -/* PCI config cycle probing - 64-bit + * This code converts 64-bit time units to other units. + * For example, the TSC is converted to HPET units. * - * boolean_t ml_probe_read_64(addr64_t paddr, unsigned int *val) + * Time is a 64-bit integer that is some number of ticks. + * Conversion is 64-bit fixed point number which is composed + * of a 32 bit integer and a 32 bit fraction. * - * Read the memory location at physical address paddr. - * This is a part of a device probe, so there is a good chance we will - * have a machine check here. So we have to be able to handle that. - * We assume that machine checks are enabled both in MSR and HIDs - */ -ENTRY(ml_probe_read_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movl 0(%ecx), %ecx - movl %ecx, 0(%eax) - movl $1, %eax - - ret - - -/* Read physical address byte + * The time ticks are multiplied by the conversion factor. The + * calculations are done as a 128-bit value but both the high + * and low words are dropped. The high word is overflow and the + * low word is the fraction part of the result. + * + * We return a 64-bit value. * - * unsigned int ml_phys_read_byte(vm_offset_t paddr) - * unsigned int ml_phys_read_byte_64(addr64_t paddr) + * Note that we can use this function to multiply 2 conversion factors. + * We do this in order to calculate the multiplier used to convert + * directly between any two units. * - * Read the byte at physical address paddr. Memory should not be cache inhibited. */ -ENTRY(ml_phys_read_byte_64) - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - xor %eax, %eax - movb 0(%ecx), %eax + .globl EXT(tmrCvt) + .align FALIGN + +LEXT(tmrCvt) + + pushl %ebp // Save a volatile + movl %esp,%ebp // Get the parameters - 8 + pushl %ebx // Save a volatile + pushl %esi // Save a volatile + pushl %edi // Save a volatile + +// %ebp + 8 - low-order ts +// %ebp + 12 - high-order ts +// %ebp + 16 - low-order cvt +// %ebp + 20 - high-order cvt + + movl 8(%ebp),%eax // Get low-order ts + mull 16(%ebp) // Multiply by low-order conversion + movl %edx,%edi // Need to save only the high order part + + movl 12(%ebp),%eax // Get the high-order ts + mull 16(%ebp) // Multiply by low-order conversion + addl %eax,%edi // Add in the overflow from the low x low calculation + adcl $0,%edx // Add in any overflow to high high part + movl %edx,%esi // Save high high part + +// We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt +// in %esi:%edi + + movl 8(%ebp),%eax // Get low-order ts + mull 20(%ebp) // Multiply by high-order conversion + movl %eax,%ebx // Need to save the low order part + movl %edx,%ecx // Need to save the high order part + + movl 12(%ebp),%eax // Get the high-order ts + mull 20(%ebp) // Multiply by high-order conversion + +// Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high +// We don't care about the highest word since it is overflow + + addl %edi,%ebx // Add the low words + adcl %ecx,%esi // Add in the high plus carry from low + addl %eax,%esi // Add in the rest of the high + + movl %ebx,%eax // Pass back low word + movl %esi,%edx // and the high word + + popl %edi // Restore a volatile + popl %esi // Restore a volatile + popl %ebx // Restore a volatile + popl %ebp // Restore a volatile + + ret // Leave... + + +/* void _rtc_nanotime_adjust( + uint64_t tsc_base_delta, + rtc_nanotime_t *dst); +*/ + .globl EXT(_rtc_nanotime_adjust) + .align FALIGN - ret +LEXT(_rtc_nanotime_adjust) + mov 12(%esp),%edx /* ptr to rtc_nanotime_info */ + + movl RNT_GENERATION(%edx),%ecx /* get current generation */ + movl $0,RNT_GENERATION(%edx) /* flag data as being updated */ -ENTRY(ml_phys_read_byte) + movl 4(%esp),%eax /* get lower 32-bits of delta */ + addl %eax,RNT_TSC_BASE(%edx) + adcl $0,RNT_TSC_BASE+4(%edx) /* propagate carry */ - movl S_ARG0, %ecx - xor %eax, %eax - movb 0(%ecx), %eax + incl %ecx /* next generation */ + jnz 1f + incl %ecx /* skip 0, which is a flag */ +1: movl %ecx,RNT_GENERATION(%edx) /* update generation and make usable */ ret -/* Read physical address half word +/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow ); * - * unsigned int ml_phys_read_half(vm_offset_t paddr) - * unsigned int ml_phys_read_half_64(addr64_t paddr) + * This is the same as the commpage nanotime routine, except that it uses the + * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies + * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped(). * - * Read the half word at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_read_half_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - xor %eax, %eax - movw 0(%ecx), %eax - - ret - -ENTRY(ml_phys_read_half) - - movl S_ARG0, %ecx - xor %eax, %eax - movw 0(%ecx), %eax - - ret - - -/* Read physical address word + * Warning! There is another copy of this code in osfmk/i386/locore.s. The + * two versions must be kept in sync with each other! * - * unsigned int ml_phys_read(vm_offset_t paddr) - * unsigned int ml_phys_read_64(addr64_t paddr) - * unsigned int ml_phys_read_word(vm_offset_t paddr) - * unsigned int ml_phys_read_word_64(addr64_t paddr) + * There are actually two versions of the algorithm, one each for "slow" and "fast" + * processors. The more common "fast" algorithm is: * - * Read the word at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_read_64) -ENTRY(ml_phys_read_word_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl 0(%ecx), %eax - - ret - -ENTRY(ml_phys_read) -ENTRY(ml_phys_read_word) - - movl S_ARG0, %ecx - movl 0(%ecx), %eax - - ret - - -/* Read physical address double + * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base; * - * unsigned long long ml_phys_read_double(vm_offset_t paddr) - * unsigned long long ml_phys_read_double_64(addr64_t paddr) + * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization: * - * Read the double word at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_read_double_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl 0(%ecx), %eax - movl 4(%ecx), %edx - - ret - -ENTRY(ml_phys_read_double) - - movl S_ARG0, %ecx - movl 0(%ecx), %eax - movl 4(%ecx), %edx - - ret - - -/* Write physical address byte + * rnt_tsc_scale = (10e9 * 2**32) / tscFreq; * - * void ml_phys_write_byte(vm_offset_t paddr, unsigned int data) - * void ml_phys_write_byte_64(addr64_t paddr, unsigned int data) + * The "slow" algorithm uses long division: * - * Write the byte at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_write_byte_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movb %eax, 0(%ecx) - - ret - -ENTRY(ml_phys_write_byte) - - movl S_ARG0, %ecx - movl S_ARG1, %eax - movb %eax, 0(%ecx) - - ret - - -/* Write physical address half word + * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base; * - * void ml_phys_write_half(vm_offset_t paddr, unsigned int data) - * void ml_phys_write_half_64(addr64_t paddr, unsigned int data) + * Since this routine is not synchronized and can be called in any context, + * we use a generation count to guard against seeing partially updated data. In addition, + * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before + * updating the data, and stores the nonzero generation only after all other data has been + * stored. Because IA32 guarantees that stores by one processor must be seen in order + * by another, we can avoid using a lock. We spin while the generation is zero. * - * Write the byte at physical address paddr. Memory should not be cache inhibited. + * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax. */ -ENTRY(ml_phys_write_half_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movw %eax, 0(%ecx) + + .globl EXT(_rtc_nanotime_read) + .align FALIGN +LEXT(_rtc_nanotime_read) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */ + movl 12(%ebp),%eax /* get "slow" flag */ + testl %eax,%eax + jnz Lslow + + /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */ + PAL_RTC_NANOTIME_READ_FAST() + + popl %ebx + popl %edi + popl %esi + popl %ebp + ret + + /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */ +Lslow: + movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */ + testl %esi,%esi /* if being changed, loop until stable */ + jz Lslow + pushl %esi /* save generation */ + pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */ + + lfence + rdtsc /* get TSC in %edx:%eax */ + lfence + subl RNT_TSC_BASE(%edi),%eax + sbbl RNT_TSC_BASE+4(%edi),%edx + + /* + * Do the math to convert tsc ticks to nanoseconds. We first + * do long multiply of 1 billion times the tsc. Then we do + * long division by the tsc frequency + */ + mov $1000000000, %ecx /* number of nanoseconds in a second */ + mov %edx, %ebx + mul %ecx + mov %edx, %edi + mov %eax, %esi + mov %ebx, %eax + mul %ecx + add %edi, %eax + adc $0, %edx /* result in edx:eax:esi */ + mov %eax, %edi + popl %ecx /* get low 32 tscFreq */ + xor %eax, %eax + xchg %edx, %eax + div %ecx + xor %eax, %eax + mov %edi, %eax + div %ecx + mov %eax, %ebx + mov %esi, %eax + div %ecx + mov %ebx, %edx /* result in edx:eax */ + + movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */ + popl %esi /* recover generation */ + + addl RNT_NS_BASE(%edi),%eax + adcl RNT_NS_BASE+4(%edi),%edx + + cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */ + jne Lslow /* yes, loop until stable */ + + pop %ebx + pop %edi + pop %esi + pop %ebp + ret /* result in edx:eax */ - ret - -ENTRY(ml_phys_write_half) - - movl S_ARG0, %ecx - movl S_ARG1, %eax - movw %eax, 0(%ecx) - - ret -/* Write physical address word - * - * void ml_phys_write(vm_offset_t paddr, unsigned int data) - * void ml_phys_write_64(addr64_t paddr, unsigned int data) - * void ml_phys_write_word(vm_offset_t paddr, unsigned int data) - * void ml_phys_write_word_64(addr64_t paddr, unsigned int data) - * - * Write the word at physical address paddr. Memory should not be cache inhibited. +/* + * Timing routines. */ -ENTRY(ml_phys_write_64) -ENTRY(ml_phys_write_word_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movl %eax, 0(%ecx) - +Entry(timer_update) + movl 4(%esp),%ecx + movl 8(%esp),%eax + movl 12(%esp),%edx + movl %eax,TIMER_HIGHCHK(%ecx) + movl %edx,TIMER_LOW(%ecx) + movl %eax,TIMER_HIGH(%ecx) ret -ENTRY(ml_phys_write) -ENTRY(ml_phys_write_word) - - movl S_ARG0, %ecx - movl S_ARG1, %eax - movl %eax, 0(%ecx) - +Entry(timer_grab) + movl 4(%esp),%ecx +0: movl TIMER_HIGH(%ecx),%edx + movl TIMER_LOW(%ecx),%eax + cmpl TIMER_HIGHCHK(%ecx),%edx + jne 0b ret -/* Write physical address double word - * - * void ml_phys_write_double(vm_offset_t paddr, unsigned long long data) - * void ml_phys_write_double_64(addr64_t paddr, unsigned long long data) - * - * Write the double word at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_write_double_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movl %eax, 0(%ecx) - movl S_ARG3, %eax - movl %eax, 4(%ecx) - - ret +Entry(call_continuation) + movl S_ARG0,%eax /* get continuation */ + movl S_ARG1,%edx /* continuation param */ + movl S_ARG2,%ecx /* wait result */ + movl %gs:CPU_KERNEL_STACK,%esp /* pop the stack */ + xorl %ebp,%ebp /* zero frame pointer */ + subl $8,%esp /* align the stack */ + pushl %ecx + pushl %edx + call *%eax /* call continuation */ + addl $16,%esp + movl %gs:CPU_ACTIVE_THREAD,%eax + pushl %eax + call EXT(thread_terminate) -ENTRY(ml_phys_write_double) - movl S_ARG0, %ecx - movl S_ARG1, %eax - movl %eax, 0(%ecx) - movl S_ARG2, %eax - movl %eax, 4(%ecx) - - ret