/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
- *
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* Please see the License for the specific language governing rights and
* limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
+
#include <i386/asm.h>
+#include <i386/apic.h>
+#include <i386/eflags.h>
+#include <i386/rtclock_asm.h>
+#include <i386/postcode.h>
+#include <i386/proc_reg.h>
+#include <assym.s>
/*
** ml_get_timebase()
*/
ENTRY(ml_get_timebase)
- movl S_ARG0, %ecx
-
- rdtsc
-
- movl %edx, 0(%ecx)
- movl %eax, 4(%ecx)
-
- ret
-
+ movl S_ARG0, %ecx
+
+ lfence
+ rdtsc
+ lfence
+
+ movl %edx, 0(%ecx)
+ movl %eax, 4(%ecx)
+
+ ret
-/* PCI config cycle probing
+/*
+ * Convert between various timer units
*
- * boolean_t ml_probe_read(vm_offset_t paddr, unsigned int *val)
+ * uint64_t tmrCvt(uint64_t time, uint64_t *conversion)
*
- * Read the memory location at physical address paddr.
- * This is a part of a device probe, so there is a good chance we will
- * have a machine check here. So we have to be able to handle that.
- * We assume that machine checks are enabled both in MSR and HIDs
- */
-ENTRY(ml_probe_read)
-
- movl S_ARG0, %ecx
- movl S_ARG1, %eax
- movl 0(%ecx), %ecx
- movl %ecx, 0(%eax)
- movl $1, %eax
-
- ret
-
-
-/* PCI config cycle probing - 64-bit
+ * This code converts 64-bit time units to other units.
+ * For example, the TSC is converted to HPET units.
*
- * boolean_t ml_probe_read_64(addr64_t paddr, unsigned int *val)
+ * Time is a 64-bit integer that is some number of ticks.
+ * Conversion is 64-bit fixed point number which is composed
+ * of a 32 bit integer and a 32 bit fraction.
*
- * Read the memory location at physical address paddr.
- * This is a part of a device probe, so there is a good chance we will
- * have a machine check here. So we have to be able to handle that.
- * We assume that machine checks are enabled both in MSR and HIDs
- */
-ENTRY(ml_probe_read_64)
-
- /* Only use lower 32 bits of address for now */
- movl S_ARG0, %ecx
- movl S_ARG2, %eax
- movl 0(%ecx), %ecx
- movl %ecx, 0(%eax)
- movl $1, %eax
-
- ret
-
-
-/* Read physical address byte
+ * The time ticks are multiplied by the conversion factor. The
+ * calculations are done as a 128-bit value but both the high
+ * and low words are dropped. The high word is overflow and the
+ * low word is the fraction part of the result.
+ *
+ * We return a 64-bit value.
*
- * unsigned int ml_phys_read_byte(vm_offset_t paddr)
- * unsigned int ml_phys_read_byte_64(addr64_t paddr)
+ * Note that we can use this function to multiply 2 conversion factors.
+ * We do this in order to calculate the multiplier used to convert
+ * directly between any two units.
*
- * Read the byte at physical address paddr. Memory should not be cache inhibited.
*/
-ENTRY(ml_phys_read_byte_64)
- /* Only use lower 32 bits of address for now */
- movl S_ARG0, %ecx
- xor %eax, %eax
- movb 0(%ecx), %eax
+ .globl EXT(tmrCvt)
+ .align FALIGN
+
+LEXT(tmrCvt)
+
+ pushl %ebp // Save a volatile
+ movl %esp,%ebp // Get the parameters - 8
+ pushl %ebx // Save a volatile
+ pushl %esi // Save a volatile
+ pushl %edi // Save a volatile
+
+// %ebp + 8 - low-order ts
+// %ebp + 12 - high-order ts
+// %ebp + 16 - low-order cvt
+// %ebp + 20 - high-order cvt
+
+ movl 8(%ebp),%eax // Get low-order ts
+ mull 16(%ebp) // Multiply by low-order conversion
+ movl %edx,%edi // Need to save only the high order part
+
+ movl 12(%ebp),%eax // Get the high-order ts
+ mull 16(%ebp) // Multiply by low-order conversion
+ addl %eax,%edi // Add in the overflow from the low x low calculation
+ adcl $0,%edx // Add in any overflow to high high part
+ movl %edx,%esi // Save high high part
+
+// We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt
+// in %esi:%edi
+
+ movl 8(%ebp),%eax // Get low-order ts
+ mull 20(%ebp) // Multiply by high-order conversion
+ movl %eax,%ebx // Need to save the low order part
+ movl %edx,%ecx // Need to save the high order part
+
+ movl 12(%ebp),%eax // Get the high-order ts
+ mull 20(%ebp) // Multiply by high-order conversion
+
+// Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high
+// We don't care about the highest word since it is overflow
+
+ addl %edi,%ebx // Add the low words
+ adcl %ecx,%esi // Add in the high plus carry from low
+ addl %eax,%esi // Add in the rest of the high
+
+ movl %ebx,%eax // Pass back low word
+ movl %esi,%edx // and the high word
+
+ popl %edi // Restore a volatile
+ popl %esi // Restore a volatile
+ popl %ebx // Restore a volatile
+ popl %ebp // Restore a volatile
+
+ ret // Leave...
+
+
+/* void _rtc_nanotime_adjust(
+ uint64_t tsc_base_delta,
+ rtc_nanotime_t *dst);
+*/
+ .globl EXT(_rtc_nanotime_adjust)
+ .align FALIGN
- ret
+LEXT(_rtc_nanotime_adjust)
+ mov 12(%esp),%edx /* ptr to rtc_nanotime_info */
+
+ movl RNT_GENERATION(%edx),%ecx /* get current generation */
+ movl $0,RNT_GENERATION(%edx) /* flag data as being updated */
-ENTRY(ml_phys_read_byte)
+ movl 4(%esp),%eax /* get lower 32-bits of delta */
+ addl %eax,RNT_TSC_BASE(%edx)
+ adcl $0,RNT_TSC_BASE+4(%edx) /* propagate carry */
- movl S_ARG0, %ecx
- xor %eax, %eax
- movb 0(%ecx), %eax
+ incl %ecx /* next generation */
+ jnz 1f
+ incl %ecx /* skip 0, which is a flag */
+1: movl %ecx,RNT_GENERATION(%edx) /* update generation and make usable */
ret
-/* Read physical address half word
+/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow );
*
- * unsigned int ml_phys_read_half(vm_offset_t paddr)
- * unsigned int ml_phys_read_half_64(addr64_t paddr)
+ * This is the same as the commpage nanotime routine, except that it uses the
+ * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies
+ * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped().
*
- * Read the half word at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_read_half_64)
-
- /* Only use lower 32 bits of address for now */
- movl S_ARG0, %ecx
- xor %eax, %eax
- movw 0(%ecx), %eax
-
- ret
-
-ENTRY(ml_phys_read_half)
-
- movl S_ARG0, %ecx
- xor %eax, %eax
- movw 0(%ecx), %eax
-
- ret
-
-
-/* Read physical address word
+ * Warning! There is another copy of this code in osfmk/i386/locore.s. The
+ * two versions must be kept in sync with each other!
*
- * unsigned int ml_phys_read(vm_offset_t paddr)
- * unsigned int ml_phys_read_64(addr64_t paddr)
- * unsigned int ml_phys_read_word(vm_offset_t paddr)
- * unsigned int ml_phys_read_word_64(addr64_t paddr)
+ * There are actually two versions of the algorithm, one each for "slow" and "fast"
+ * processors. The more common "fast" algorithm is:
*
- * Read the word at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_read_64)
-ENTRY(ml_phys_read_word_64)
-
- /* Only use lower 32 bits of address for now */
- movl S_ARG0, %ecx
- movl 0(%ecx), %eax
-
- ret
-
-ENTRY(ml_phys_read)
-ENTRY(ml_phys_read_word)
-
- movl S_ARG0, %ecx
- movl 0(%ecx), %eax
-
- ret
-
-
-/* Read physical address double
+ * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base;
*
- * unsigned long long ml_phys_read_double(vm_offset_t paddr)
- * unsigned long long ml_phys_read_double_64(addr64_t paddr)
+ * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization:
*
- * Read the double word at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_read_double_64)
-
- /* Only use lower 32 bits of address for now */
- movl S_ARG0, %ecx
- movl 0(%ecx), %eax
- movl 4(%ecx), %edx
-
- ret
-
-ENTRY(ml_phys_read_double)
-
- movl S_ARG0, %ecx
- movl 0(%ecx), %eax
- movl 4(%ecx), %edx
-
- ret
-
-
-/* Write physical address byte
+ * rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
*
- * void ml_phys_write_byte(vm_offset_t paddr, unsigned int data)
- * void ml_phys_write_byte_64(addr64_t paddr, unsigned int data)
+ * The "slow" algorithm uses long division:
*
- * Write the byte at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_write_byte_64)
-
- /* Only use lower 32 bits of address for now */
- movl S_ARG0, %ecx
- movl S_ARG2, %eax
- movb %eax, 0(%ecx)
-
- ret
-
-ENTRY(ml_phys_write_byte)
-
- movl S_ARG0, %ecx
- movl S_ARG1, %eax
- movb %eax, 0(%ecx)
-
- ret
-
-
-/* Write physical address half word
+ * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base;
*
- * void ml_phys_write_half(vm_offset_t paddr, unsigned int data)
- * void ml_phys_write_half_64(addr64_t paddr, unsigned int data)
+ * Since this routine is not synchronized and can be called in any context,
+ * we use a generation count to guard against seeing partially updated data. In addition,
+ * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before
+ * updating the data, and stores the nonzero generation only after all other data has been
+ * stored. Because IA32 guarantees that stores by one processor must be seen in order
+ * by another, we can avoid using a lock. We spin while the generation is zero.
*
- * Write the byte at physical address paddr. Memory should not be cache inhibited.
+ * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax.
*/
-ENTRY(ml_phys_write_half_64)
-
- /* Only use lower 32 bits of address for now */
- movl S_ARG0, %ecx
- movl S_ARG2, %eax
- movw %eax, 0(%ecx)
+
+ .globl EXT(_rtc_nanotime_read)
+ .align FALIGN
+LEXT(_rtc_nanotime_read)
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %ebx
+ movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */
+ movl 12(%ebp),%eax /* get "slow" flag */
+ testl %eax,%eax
+ jnz Lslow
+
+ /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
+ PAL_RTC_NANOTIME_READ_FAST()
+
+ popl %ebx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+
+ /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */
+Lslow:
+ movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */
+ testl %esi,%esi /* if being changed, loop until stable */
+ jz Lslow
+ pushl %esi /* save generation */
+ pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */
+
+ lfence
+ rdtsc /* get TSC in %edx:%eax */
+ lfence
+ subl RNT_TSC_BASE(%edi),%eax
+ sbbl RNT_TSC_BASE+4(%edi),%edx
+
+ /*
+ * Do the math to convert tsc ticks to nanoseconds. We first
+ * do long multiply of 1 billion times the tsc. Then we do
+ * long division by the tsc frequency
+ */
+ mov $1000000000, %ecx /* number of nanoseconds in a second */
+ mov %edx, %ebx
+ mul %ecx
+ mov %edx, %edi
+ mov %eax, %esi
+ mov %ebx, %eax
+ mul %ecx
+ add %edi, %eax
+ adc $0, %edx /* result in edx:eax:esi */
+ mov %eax, %edi
+ popl %ecx /* get low 32 tscFreq */
+ xor %eax, %eax
+ xchg %edx, %eax
+ div %ecx
+ xor %eax, %eax
+ mov %edi, %eax
+ div %ecx
+ mov %eax, %ebx
+ mov %esi, %eax
+ div %ecx
+ mov %ebx, %edx /* result in edx:eax */
+
+ movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */
+ popl %esi /* recover generation */
+
+ addl RNT_NS_BASE(%edi),%eax
+ adcl RNT_NS_BASE+4(%edi),%edx
+
+ cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */
+ jne Lslow /* yes, loop until stable */
+
+ pop %ebx
+ pop %edi
+ pop %esi
+ pop %ebp
+ ret /* result in edx:eax */
- ret
-
-ENTRY(ml_phys_write_half)
-
- movl S_ARG0, %ecx
- movl S_ARG1, %eax
- movw %eax, 0(%ecx)
-
- ret
-/* Write physical address word
- *
- * void ml_phys_write(vm_offset_t paddr, unsigned int data)
- * void ml_phys_write_64(addr64_t paddr, unsigned int data)
- * void ml_phys_write_word(vm_offset_t paddr, unsigned int data)
- * void ml_phys_write_word_64(addr64_t paddr, unsigned int data)
- *
- * Write the word at physical address paddr. Memory should not be cache inhibited.
+/*
+ * Timing routines.
*/
-ENTRY(ml_phys_write_64)
-ENTRY(ml_phys_write_word_64)
-
- /* Only use lower 32 bits of address for now */
- movl S_ARG0, %ecx
- movl S_ARG2, %eax
- movl %eax, 0(%ecx)
-
+Entry(timer_update)
+ movl 4(%esp),%ecx
+ movl 8(%esp),%eax
+ movl 12(%esp),%edx
+ movl %eax,TIMER_HIGHCHK(%ecx)
+ movl %edx,TIMER_LOW(%ecx)
+ movl %eax,TIMER_HIGH(%ecx)
ret
-ENTRY(ml_phys_write)
-ENTRY(ml_phys_write_word)
-
- movl S_ARG0, %ecx
- movl S_ARG1, %eax
- movl %eax, 0(%ecx)
-
+Entry(timer_grab)
+ movl 4(%esp),%ecx
+0: movl TIMER_HIGH(%ecx),%edx
+ movl TIMER_LOW(%ecx),%eax
+ cmpl TIMER_HIGHCHK(%ecx),%edx
+ jne 0b
ret
-/* Write physical address double word
- *
- * void ml_phys_write_double(vm_offset_t paddr, unsigned long long data)
- * void ml_phys_write_double_64(addr64_t paddr, unsigned long long data)
- *
- * Write the double word at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_write_double_64)
-
- /* Only use lower 32 bits of address for now */
- movl S_ARG0, %ecx
- movl S_ARG2, %eax
- movl %eax, 0(%ecx)
- movl S_ARG3, %eax
- movl %eax, 4(%ecx)
-
- ret
+Entry(call_continuation)
+ movl S_ARG0,%eax /* get continuation */
+ movl S_ARG1,%edx /* continuation param */
+ movl S_ARG2,%ecx /* wait result */
+ movl %gs:CPU_KERNEL_STACK,%esp /* pop the stack */
+ xorl %ebp,%ebp /* zero frame pointer */
+ subl $8,%esp /* align the stack */
+ pushl %ecx
+ pushl %edx
+ call *%eax /* call continuation */
+ addl $16,%esp
+ movl %gs:CPU_ACTIVE_THREAD,%eax
+ pushl %eax
+ call EXT(thread_terminate)
-ENTRY(ml_phys_write_double)
- movl S_ARG0, %ecx
- movl S_ARG1, %eax
- movl %eax, 0(%ecx)
- movl S_ARG2, %eax
- movl %eax, 4(%ecx)
-
- ret