X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e5568f75972dfc723778653c11cb6b4dc825716a..7ddcb079202367355dddccdfa4318e57d50318be:/osfmk/i386/machine_routines_asm.s

diff --git a/osfmk/i386/machine_routines_asm.s b/osfmk/i386/machine_routines_asm.s
index eba45c21c..0e3d9fb68 100644
--- a/osfmk/i386/machine_routines_asm.s
+++ b/osfmk/i386/machine_routines_asm.s
@@ -1,25 +1,38 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+ 
 #include <i386/asm.h>
+#include <i386/apic.h>
+#include <i386/eflags.h>
+#include <i386/rtclock_asm.h>
+#include <i386/postcode.h>
+#include <i386/proc_reg.h>
+#include <assym.s>
 
 /*
 **      ml_get_timebase()
@@ -31,262 +44,268 @@
 */
 ENTRY(ml_get_timebase)
 
-        movl    S_ARG0, %ecx
-
-        rdtsc
-
-        movl    %edx, 0(%ecx)
-        movl    %eax, 4(%ecx)
-
-        ret
-
+			movl    S_ARG0, %ecx
+			
+			lfence
+			rdtsc
+			lfence
+			
+			movl    %edx, 0(%ecx)
+			movl    %eax, 4(%ecx)
+			
+			ret
 
-/* PCI config cycle probing
+/*
+ *  	Convert between various timer units 
  *
- *      boolean_t ml_probe_read(vm_offset_t paddr, unsigned int *val)
+ *		uint64_t tmrCvt(uint64_t time, uint64_t *conversion)
  *
- *      Read the memory location at physical address paddr.
- *  This is a part of a device probe, so there is a good chance we will
- *  have a machine check here. So we have to be able to handle that.
- *  We assume that machine checks are enabled both in MSR and HIDs
- */
-ENTRY(ml_probe_read)
-
-        movl S_ARG0, %ecx
-        movl S_ARG1, %eax
-        movl 0(%ecx), %ecx
-        movl %ecx, 0(%eax)
-        movl $1, %eax
-
-        ret
-
-
-/* PCI config cycle probing - 64-bit
+ *		This code converts 64-bit time units to other units.
+ *		For example, the TSC is converted to HPET units.
  *
- *      boolean_t ml_probe_read_64(addr64_t paddr, unsigned int *val)
+ *		Time is a 64-bit integer that is some number of ticks.
+ *		Conversion is 64-bit fixed point number which is composed
+ *		of a 32 bit integer and a 32 bit fraction. 
  *
- *      Read the memory location at physical address paddr.
- *  This is a part of a device probe, so there is a good chance we will
- *  have a machine check here. So we have to be able to handle that.
- *  We assume that machine checks are enabled both in MSR and HIDs
- */
-ENTRY(ml_probe_read_64)
-
-        /* Only use lower 32 bits of address for now */
-        movl S_ARG0, %ecx
-        movl S_ARG2, %eax
-        movl 0(%ecx), %ecx
-        movl %ecx, 0(%eax)
-        movl $1, %eax
-
-        ret
-
-
-/* Read physical address byte
+ *		The time ticks are multiplied by the conversion factor.  The
+ *		calculations are done as a 128-bit value but both the high
+ *		and low words are dropped.  The high word is overflow and the
+ *		low word is the fraction part of the result.
+ *
+ *		We return a 64-bit value.
  *
- *      unsigned int ml_phys_read_byte(vm_offset_t paddr)
- *      unsigned int ml_phys_read_byte_64(addr64_t paddr)
+ *		Note that we can use this function to multiply 2 conversion factors.
+ *		We do this in order to calculate the multiplier used to convert
+ *		directly between any two units.
  *
- *      Read the byte at physical address paddr. Memory should not be cache inhibited.
  */
-ENTRY(ml_phys_read_byte_64)
 
-        /* Only use lower 32 bits of address for now */
-        movl S_ARG0, %ecx
-        xor %eax, %eax
-        movb 0(%ecx), %eax
+			.globl	EXT(tmrCvt)
+			.align FALIGN
+
+LEXT(tmrCvt)
+
+			pushl	%ebp					// Save a volatile
+			movl	%esp,%ebp				// Get the parameters - 8
+			pushl	%ebx					// Save a volatile
+			pushl	%esi					// Save a volatile
+			pushl	%edi					// Save a volatile
+
+//			%ebp + 8	- low-order ts
+//			%ebp + 12	- high-order ts
+//			%ebp + 16	- low-order cvt
+//			%ebp + 20	- high-order cvt
+
+			movl	8(%ebp),%eax			// Get low-order ts
+			mull	16(%ebp)				// Multiply by low-order conversion
+			movl	%edx,%edi				// Need to save only the high order part
+			
+			movl	12(%ebp),%eax			// Get the high-order ts
+			mull	16(%ebp)				// Multiply by low-order conversion
+			addl	%eax,%edi				// Add in the overflow from the low x low calculation
+			adcl	$0,%edx					// Add in any overflow to high high part
+			movl	%edx,%esi				// Save high high part
+			
+//			We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt
+//			in %esi:%edi
+
+			movl	8(%ebp),%eax			// Get low-order ts
+			mull	20(%ebp)				// Multiply by high-order conversion
+			movl	%eax,%ebx				// Need to save the low order part
+			movl	%edx,%ecx				// Need to save the high order part
+			
+			movl	12(%ebp),%eax			// Get the high-order ts
+			mull	20(%ebp)				// Multiply by high-order conversion
+			
+//			Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high
+//			We don't care about the highest word since it is overflow
+			
+			addl	%edi,%ebx				// Add the low words
+			adcl	%ecx,%esi				// Add in the high plus carry from low
+			addl	%eax,%esi				// Add in the rest of the high
+			
+			movl	%ebx,%eax				// Pass back low word
+			movl	%esi,%edx				// and the high word
+			
+			popl	%edi					// Restore a volatile
+			popl	%esi					// Restore a volatile
+			popl	%ebx					// Restore a volatile
+			popl	%ebp					// Restore a volatile
+
+			ret						// Leave...
+
+
+/* void  _rtc_nanotime_adjust(	
+		uint64_t         tsc_base_delta,
+	        rtc_nanotime_t  *dst);
+*/
+	.globl	EXT(_rtc_nanotime_adjust)
+	.align	FALIGN
 
-	ret
+LEXT(_rtc_nanotime_adjust)
+	mov	12(%esp),%edx			/* ptr to rtc_nanotime_info */
+	
+	movl	RNT_GENERATION(%edx),%ecx	/* get current generation */
+	movl	$0,RNT_GENERATION(%edx)		/* flag data as being updated */
 
-ENTRY(ml_phys_read_byte)
+	movl	4(%esp),%eax			/* get lower 32-bits of delta */
+	addl	%eax,RNT_TSC_BASE(%edx)
+	adcl	$0,RNT_TSC_BASE+4(%edx)		/* propagate carry */
 
-        movl S_ARG0, %ecx
-        xor %eax, %eax
-        movb 0(%ecx), %eax
+	incl	%ecx				/* next generation */
+	jnz	1f
+	incl	%ecx				/* skip 0, which is a flag */
+1:	movl	%ecx,RNT_GENERATION(%edx)	/* update generation and make usable */
 
 	ret
 
 
-/* Read physical address half word
+/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow );
  *
- *      unsigned int ml_phys_read_half(vm_offset_t paddr)
- *      unsigned int ml_phys_read_half_64(addr64_t paddr)
+ * This is the same as the commpage nanotime routine, except that it uses the
+ * kernel internal "rtc_nanotime_info" data instead of the commpage data.  The two copies
+ * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped().
  *
- *      Read the half word at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_read_half_64)
-
-        /* Only use lower 32 bits of address for now */
-        movl S_ARG0, %ecx
-        xor %eax, %eax
-        movw 0(%ecx), %eax
-
-	ret
-
-ENTRY(ml_phys_read_half)
-
-        movl S_ARG0, %ecx
-        xor %eax, %eax
-        movw 0(%ecx), %eax
-
-	ret
-
-
-/* Read physical address word
+ * Warning!  There is another copy of this code in osfmk/i386/locore.s.  The
+ * two versions must be kept in sync with each other!
  *
- *      unsigned int ml_phys_read(vm_offset_t paddr)
- *      unsigned int ml_phys_read_64(addr64_t paddr)
- *      unsigned int ml_phys_read_word(vm_offset_t paddr)
- *      unsigned int ml_phys_read_word_64(addr64_t paddr)
+ * There are actually two versions of the algorithm, one each for "slow" and "fast"
+ * processors.  The more common "fast" algorithm is:
  *
- *      Read the word at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_read_64)
-ENTRY(ml_phys_read_word_64)
-
-        /* Only use lower 32 bits of address for now */
-        movl S_ARG0, %ecx
-        movl 0(%ecx), %eax
-
-	ret
-
-ENTRY(ml_phys_read)
-ENTRY(ml_phys_read_word)
-
-        movl S_ARG0, %ecx
-        movl 0(%ecx), %eax
-
-	ret
-
-
-/* Read physical address double
+ *	nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base;
  *
- *      unsigned long long ml_phys_read_double(vm_offset_t paddr)
- *      unsigned long long ml_phys_read_double_64(addr64_t paddr)
+ * Of course, the divide by 2**32 is a nop.  rnt_tsc_scale is a constant computed during initialization:
  *
- *      Read the double word at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_read_double_64)
-
-        /* Only use lower 32 bits of address for now */
-        movl S_ARG0, %ecx
-        movl 0(%ecx), %eax
-        movl 4(%ecx), %edx
-
-	ret
-
-ENTRY(ml_phys_read_double)
-
-        movl S_ARG0, %ecx
-        movl 0(%ecx), %eax
-        movl 4(%ecx), %edx
-
-	ret
-
-
-/* Write physical address byte
+ *	rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
  *
- *      void ml_phys_write_byte(vm_offset_t paddr, unsigned int data)
- *      void ml_phys_write_byte_64(addr64_t paddr, unsigned int data)
+ * The "slow" algorithm uses long division:
  *
- *      Write the byte at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_write_byte_64)
-
-        /* Only use lower 32 bits of address for now */
-        movl S_ARG0, %ecx
-        movl S_ARG2, %eax
-        movb %eax, 0(%ecx)
-
-	ret
-
-ENTRY(ml_phys_write_byte)
-
-        movl S_ARG0, %ecx
-        movl S_ARG1, %eax
-        movb %eax, 0(%ecx)
-
-	ret
-
-
-/* Write physical address half word
+ *	nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base;
  *
- *      void ml_phys_write_half(vm_offset_t paddr, unsigned int data)
- *      void ml_phys_write_half_64(addr64_t paddr, unsigned int data)
+ * Since this routine is not synchronized and can be called in any context, 
+ * we use a generation count to guard against seeing partially updated data.  In addition,
+ * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before
+ * updating the data, and stores the nonzero generation only after all other data has been
+ * stored.  Because IA32 guarantees that stores by one processor must be seen in order
+ * by another, we can avoid using a lock.  We spin while the generation is zero.
  *
- *      Write the byte at physical address paddr. Memory should not be cache inhibited.
+ * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax.
  */
-ENTRY(ml_phys_write_half_64)
-
-        /* Only use lower 32 bits of address for now */
-        movl S_ARG0, %ecx
-        movl S_ARG2, %eax
-        movw %eax, 0(%ecx)
+ 
+		.globl	EXT(_rtc_nanotime_read)
+		.align	FALIGN
+LEXT(_rtc_nanotime_read)
+		pushl		%ebp
+		movl		%esp,%ebp
+		pushl		%esi
+		pushl		%edi
+		pushl		%ebx
+		movl		8(%ebp),%edi				/* get ptr to rtc_nanotime_info */
+		movl		12(%ebp),%eax				/* get "slow" flag */
+		testl		%eax,%eax
+		jnz		Lslow
+		
+		/* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
+		PAL_RTC_NANOTIME_READ_FAST()
+
+		popl		%ebx
+		popl		%edi
+		popl		%esi
+		popl		%ebp
+		ret
+
+		/* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */
+Lslow:
+		movl		RNT_GENERATION(%edi),%esi		/* get generation (0 if being changed) */
+		testl		%esi,%esi				/* if being changed, loop until stable */
+		jz		Lslow
+		pushl		%esi					/* save generation */
+		pushl		RNT_SHIFT(%edi)				/* save low 32 bits of tscFreq */
+
+		lfence
+		rdtsc	  						/* get TSC in %edx:%eax */
+		lfence
+		subl		RNT_TSC_BASE(%edi),%eax
+		sbbl		RNT_TSC_BASE+4(%edi),%edx
+
+		/*
+		* Do the math to convert tsc ticks to nanoseconds.  We first
+		* do long multiply of 1 billion times the tsc.  Then we do
+		* long division by the tsc frequency
+		*/
+		mov		$1000000000, %ecx			/* number of nanoseconds in a second */
+		mov		%edx, %ebx
+		mul		%ecx
+		mov		%edx, %edi
+		mov		%eax, %esi
+		mov		%ebx, %eax
+		mul		%ecx
+		add		%edi, %eax
+		adc		$0, %edx				/* result in edx:eax:esi */
+		mov		%eax, %edi
+		popl		%ecx					/* get low 32 tscFreq */
+		xor		%eax, %eax
+		xchg		%edx, %eax
+		div		%ecx
+		xor		%eax, %eax
+		mov		%edi, %eax
+		div		%ecx
+		mov		%eax, %ebx
+		mov		%esi, %eax
+		div		%ecx
+		mov		%ebx, %edx				/* result in edx:eax */
+		
+		movl		8(%ebp),%edi				/* recover ptr to rtc_nanotime_info */
+		popl		%esi					/* recover generation */
+
+		addl		RNT_NS_BASE(%edi),%eax
+		adcl		RNT_NS_BASE+4(%edi),%edx
+
+		cmpl		RNT_GENERATION(%edi),%esi		/* have the parameters changed? */
+		jne		Lslow					/* yes, loop until stable */
+
+		pop		%ebx
+		pop		%edi
+		pop		%esi
+		pop		%ebp
+		ret							/* result in edx:eax */
 
-	ret
-
-ENTRY(ml_phys_write_half)
-
-        movl S_ARG0, %ecx
-        movl S_ARG1, %eax
-        movw %eax, 0(%ecx)
-
-	ret
 
 
-/* Write physical address word
- *
- *      void ml_phys_write(vm_offset_t paddr, unsigned int data)
- *      void ml_phys_write_64(addr64_t paddr, unsigned int data)
- *      void ml_phys_write_word(vm_offset_t paddr, unsigned int data)
- *      void ml_phys_write_word_64(addr64_t paddr, unsigned int data)
- *
- *      Write the word at physical address paddr. Memory should not be cache inhibited.
+/*
+ * Timing routines.
  */
-ENTRY(ml_phys_write_64)
-ENTRY(ml_phys_write_word_64)
-
-        /* Only use lower 32 bits of address for now */
-        movl S_ARG0, %ecx
-        movl S_ARG2, %eax
-        movl %eax, 0(%ecx)
-
+Entry(timer_update)
+	movl	4(%esp),%ecx
+	movl	8(%esp),%eax
+	movl	12(%esp),%edx
+	movl	%eax,TIMER_HIGHCHK(%ecx)
+	movl	%edx,TIMER_LOW(%ecx)
+	movl	%eax,TIMER_HIGH(%ecx)
 	ret
 
-ENTRY(ml_phys_write)
-ENTRY(ml_phys_write_word)
-
-        movl S_ARG0, %ecx
-        movl S_ARG1, %eax
-        movl %eax, 0(%ecx)
-
+Entry(timer_grab)
+	movl	4(%esp),%ecx
+0:	movl	TIMER_HIGH(%ecx),%edx
+	movl	TIMER_LOW(%ecx),%eax
+	cmpl	TIMER_HIGHCHK(%ecx),%edx
+	jne	0b
 	ret
 
 
-/* Write physical address double word
- *
- *      void ml_phys_write_double(vm_offset_t paddr, unsigned long long data)
- *      void ml_phys_write_double_64(addr64_t paddr, unsigned long long data)
- *
- *      Write the double word at physical address paddr. Memory should not be cache inhibited.
- */
-ENTRY(ml_phys_write_double_64)
-
-        /* Only use lower 32 bits of address for now */
-        movl S_ARG0, %ecx
-        movl S_ARG2, %eax
-        movl %eax, 0(%ecx)
-        movl S_ARG3, %eax
-        movl %eax, 4(%ecx)
-
-	ret
+Entry(call_continuation)
+	movl	S_ARG0,%eax			/* get continuation */
+	movl	S_ARG1,%edx			/* continuation param */
+	movl	S_ARG2,%ecx			/* wait result */
+	movl	%gs:CPU_KERNEL_STACK,%esp	/* pop the stack */
+	xorl	%ebp,%ebp			/* zero frame pointer */
+	subl	$8,%esp				/* align the stack */
+	pushl	%ecx
+	pushl	%edx
+	call	*%eax				/* call continuation */
+	addl	$16,%esp
+	movl	%gs:CPU_ACTIVE_THREAD,%eax
+	pushl	%eax
+	call	EXT(thread_terminate)
 
-ENTRY(ml_phys_write_double)
 
-        movl S_ARG0, %ecx
-        movl S_ARG1, %eax
-        movl %eax, 0(%ecx)
-        movl S_ARG2, %eax
-        movl %eax, 4(%ecx)
-
-	ret