[apple/xnu.git] / osfmk / i386 / machine_routines_asm.s

/*
 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
 
#include <i386/asm.h>
#include <i386/rtclock.h>
#include <i386/proc_reg.h>
#include <i386/eflags.h>
       
#include <i386/postcode.h>
#include <i386/apic.h>
#include <assym.s>

/*
**      ml_get_timebase()
**
**      Entry   - %esp contains pointer to 64 bit structure.
**
**      Exit    - 64 bit structure filled in.
**
*/
ENTRY(ml_get_timebase)

			movl    S_ARG0, %ecx
			
			lfence
			rdtsc
			lfence
			
			movl    %edx, 0(%ecx)
			movl    %eax, 4(%ecx)
			
			ret

/*
 *  	Convert between various timer units 
 *
 *		uint64_t tmrCvt(uint64_t time, uint64_t *conversion)
 *
 *		This code converts 64-bit time units to other units.
 *		For example, the TSC is converted to HPET units.
 *
 *		Time is a 64-bit integer that is some number of ticks.
 *		Conversion is 64-bit fixed point number which is composed
 *		of a 32 bit integer and a 32 bit fraction. 
 *
 *		The time ticks are multiplied by the conversion factor.  The
 *		calculations are done as a 128-bit value but both the high
 *		and low words are dropped.  The high word is overflow and the
 *		low word is the fraction part of the result.
 *
 *		We return a 64-bit value.
 *
 *		Note that we can use this function to multiply 2 conversion factors.
 *		We do this in order to calculate the multiplier used to convert
 *		directly between any two units.
 *
 */

			.globl	EXT(tmrCvt)
			.align FALIGN

LEXT(tmrCvt)

			pushl	%ebp					// Save a volatile
			movl	%esp,%ebp				// Get the parameters - 8
			pushl	%ebx					// Save a volatile
			pushl	%esi					// Save a volatile
			pushl	%edi					// Save a volatile

//			%ebp + 8	- low-order ts
//			%ebp + 12	- high-order ts
//			%ebp + 16	- low-order cvt
//			%ebp + 20	- high-order cvt

			movl	8(%ebp),%eax			// Get low-order ts
			mull	16(%ebp)				// Multiply by low-order conversion
			movl	%edx,%edi				// Need to save only the high order part
			
			movl	12(%ebp),%eax			// Get the high-order ts
			mull	16(%ebp)				// Multiply by low-order conversion
			addl	%eax,%edi				// Add in the overflow from the low x low calculation
			adcl	$0,%edx					// Add in any overflow to high high part
			movl	%edx,%esi				// Save high high part
			
//			We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt
//			in %esi:%edi

			movl	8(%ebp),%eax			// Get low-order ts
			mull	20(%ebp)				// Multiply by high-order conversion
			movl	%eax,%ebx				// Need to save the low order part
			movl	%edx,%ecx				// Need to save the high order part
			
			movl	12(%ebp),%eax			// Get the high-order ts
			mull	20(%ebp)				// Multiply by high-order conversion
			
//			Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high
//			We don't care about the highest word since it is overflow
			
			addl	%edi,%ebx				// Add the low words
			adcl	%ecx,%esi				// Add in the high plus carry from low
			addl	%eax,%esi				// Add in the rest of the high
			
			movl	%ebx,%eax				// Pass back low word
			movl	%esi,%edx				// and the high word
			
			popl	%edi					// Restore a volatile
			popl	%esi					// Restore a volatile
			popl	%ebx					// Restore a volatile
			popl	%ebp					// Restore a volatile

			ret						// Leave...


/* void             _rtc_nanotime_store(uint64_t                tsc,
	                                uint64_t                nsec,
	                                uint32_t                scale,
	                                uint32_t                shift,
	                                rtc_nanotime_t  *dst) ;
*/
			.globl	EXT(_rtc_nanotime_store)
			.align	FALIGN

LEXT(_rtc_nanotime_store)
		push		%ebp
		movl		%esp,%ebp
		push		%esi

		mov		32(%ebp),%edx				/* get ptr to rtc_nanotime_info */
		
		movl		RNT_GENERATION(%edx),%esi		/* get current generation */
		movl		$0,RNT_GENERATION(%edx)			/* flag data as being updated */

		mov		8(%ebp),%eax
		mov		%eax,RNT_TSC_BASE(%edx)
		mov		12(%ebp),%eax
		mov		%eax,RNT_TSC_BASE+4(%edx)

		mov		24(%ebp),%eax
		mov		%eax,RNT_SCALE(%edx)

		mov		28(%ebp),%eax
		mov		%eax,RNT_SHIFT(%edx)

		mov		16(%ebp),%eax
		mov		%eax,RNT_NS_BASE(%edx)
		mov		20(%ebp),%eax
		mov		%eax,RNT_NS_BASE+4(%edx)
		
		incl		%esi					/* next generation */
		jnz		1f
		incl		%esi					/* skip 0, which is a flag */
1:		movl		%esi,RNT_GENERATION(%edx)		/* update generation and make usable */

		pop		%esi
		pop		%ebp
		ret


/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow );
 *
 * This is the same as the commpage nanotime routine, except that it uses the
 * kernel internal "rtc_nanotime_info" data instead of the commpage data.  The two copies
 * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped().
 *
 * Warning!  There is another copy of this code in osfmk/i386/locore.s.  The
 * two versions must be kept in sync with each other!
 *
 * There are actually two versions of the algorithm, one each for "slow" and "fast"
 * processors.  The more common "fast" algorithm is:
 *
 *	nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base;
 *
 * Of course, the divide by 2**32 is a nop.  rnt_tsc_scale is a constant computed during initialization:
 *
 *	rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
 *
 * The "slow" algorithm uses long division:
 *
 *	nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base;
 *
 * Since this routine is not synchronized and can be called in any context, 
 * we use a generation count to guard against seeing partially updated data.  In addition,
 * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before
 * updating the data, and stores the nonzero generation only after all other data has been
 * stored.  Because IA32 guarantees that stores by one processor must be seen in order
 * by another, we can avoid using a lock.  We spin while the generation is zero.
 *
 * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax.
 */
 
		.globl	EXT(_rtc_nanotime_read)
		.align	FALIGN
LEXT(_rtc_nanotime_read)
		pushl		%ebp
		movl		%esp,%ebp
		pushl		%esi
		pushl		%edi
		pushl		%ebx
		movl		8(%ebp),%edi				/* get ptr to rtc_nanotime_info */
		movl		12(%ebp),%eax				/* get "slow" flag */
		testl		%eax,%eax
		jnz		Lslow
		
		/* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
		RTC_NANOTIME_READ_FAST()

		popl		%ebx
		popl		%edi
		popl		%esi
		popl		%ebp
		ret

		/* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */
Lslow:
		movl		RNT_GENERATION(%edi),%esi		/* get generation (0 if being changed) */
		testl		%esi,%esi				/* if being changed, loop until stable */
		jz		Lslow
		pushl		%esi					/* save generation */
		pushl		RNT_SHIFT(%edi)				/* save low 32 bits of tscFreq */

		lfence
		rdtsc	  						/* get TSC in %edx:%eax */
		lfence
		subl		RNT_TSC_BASE(%edi),%eax
		sbbl		RNT_TSC_BASE+4(%edi),%edx

		/*
		* Do the math to convert tsc ticks to nanoseconds.  We first
		* do long multiply of 1 billion times the tsc.  Then we do
		* long division by the tsc frequency
		*/
		mov		$1000000000, %ecx			/* number of nanoseconds in a second */
		mov		%edx, %ebx
		mul		%ecx
		mov		%edx, %edi
		mov		%eax, %esi
		mov		%ebx, %eax
		mul		%ecx
		add		%edi, %eax
		adc		$0, %edx				/* result in edx:eax:esi */
		mov		%eax, %edi
		popl		%ecx					/* get low 32 tscFreq */
		xor		%eax, %eax
		xchg		%edx, %eax
		div		%ecx
		xor		%eax, %eax
		mov		%edi, %eax
		div		%ecx
		mov		%eax, %ebx
		mov		%esi, %eax
		div		%ecx
		mov		%ebx, %edx				/* result in edx:eax */
		
		movl		8(%ebp),%edi				/* recover ptr to rtc_nanotime_info */
		popl		%esi					/* recover generation */

		addl		RNT_NS_BASE(%edi),%eax
		adcl		RNT_NS_BASE+4(%edi),%edx

		cmpl		RNT_GENERATION(%edi),%esi		/* have the parameters changed? */
		jne		Lslow					/* yes, loop until stable */

		pop		%ebx
		pop		%edi
		pop		%esi
		pop		%ebp
		ret							/* result in edx:eax */
Commit	Line	Data
1c79356b	1	/*
2d21ac55	2	* Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b	3	*
2d21ac55	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b	5	*
2d21ac55 A	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
8f6c56a5	14	*
2d21ac55 A	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5 A	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
8f6c56a5 A	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55 A	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
8f6c56a5	25	*
2d21ac55	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b	27	*/
0c530ab8	28
1c79356b	29	#include <i386/asm.h>
593a1d5f	30	#include <i386/rtclock.h>
0c530ab8 A	31	#include <i386/proc_reg.h>
	32	#include <i386/eflags.h>
	33
	34	#include <i386/postcode.h>
	35	#include <i386/apic.h>
	36	#include <assym.s>
	37
1c79356b A	38	/*
	39	** ml_get_timebase()
	40	**
	41	** Entry - %esp contains pointer to 64 bit structure.
	42	**
	43	** Exit - 64 bit structure filled in.
	44	**
	45	*/
	46	ENTRY(ml_get_timebase)
	47
0c530ab8 A	48	movl S_ARG0, %ecx
0c530ab8 A	49
c910b4d9	50	lfence
0c530ab8	51	rdtsc
593a1d5f	52	lfence
0c530ab8 A	53
	54	movl %edx, 0(%ecx)
	55	movl %eax, 4(%ecx)
	56
	57	ret
	58
0c530ab8 A	59	/*
	60	* Convert between various timer units
	61	*
	62	* uint64_t tmrCvt(uint64_t time, uint64_t *conversion)
	63	*
	64	* This code converts 64-bit time units to other units.
	65	* For example, the TSC is converted to HPET units.
	66	*
	67	* Time is a 64-bit integer that is some number of ticks.
	68	* Conversion is 64-bit fixed point number which is composed
	69	* of a 32 bit integer and a 32 bit fraction.
	70	*
	71	* The time ticks are multiplied by the conversion factor. The
	72	* calculations are done as a 128-bit value but both the high
	73	* and low words are dropped. The high word is overflow and the
	74	* low word is the fraction part of the result.
	75	*
	76	* We return a 64-bit value.
	77	*
	78	* Note that we can use this function to multiply 2 conversion factors.
	79	* We do this in order to calculate the multiplier used to convert
	80	* directly between any two units.
	81	*
	82	*/
	83
	84	.globl EXT(tmrCvt)
	85	.align FALIGN
	86
	87	LEXT(tmrCvt)
	88
	89	pushl %ebp // Save a volatile
	90	movl %esp,%ebp // Get the parameters - 8
	91	pushl %ebx // Save a volatile
	92	pushl %esi // Save a volatile
	93	pushl %edi // Save a volatile
	94
	95	// %ebp + 8 - low-order ts
	96	// %ebp + 12 - high-order ts
	97	// %ebp + 16 - low-order cvt
	98	// %ebp + 20 - high-order cvt
	99
	100	movl 8(%ebp),%eax // Get low-order ts
	101	mull 16(%ebp) // Multiply by low-order conversion
	102	movl %edx,%edi // Need to save only the high order part
	103
	104	movl 12(%ebp),%eax // Get the high-order ts
	105	mull 16(%ebp) // Multiply by low-order conversion
	106	addl %eax,%edi // Add in the overflow from the low x low calculation
	107	adcl $0,%edx // Add in any overflow to high high part
	108	movl %edx,%esi // Save high high part
	109
	110	// We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt
	111	// in %esi:%edi
	112
	113	movl 8(%ebp),%eax // Get low-order ts
	114	mull 20(%ebp) // Multiply by high-order conversion
	115	movl %eax,%ebx // Need to save the low order part
	116	movl %edx,%ecx // Need to save the high order part
	117
	118	movl 12(%ebp),%eax // Get the high-order ts
	119	mull 20(%ebp) // Multiply by high-order conversion
	120
	121	// Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high
	122	// We don't care about the highest word since it is overflow
123
124	addl %edi,%ebx // Add the low words
125	adcl %ecx,%esi // Add in the high plus carry from low
126	addl %eax,%esi // Add in the rest of the high
127
128	movl %ebx,%eax // Pass back low word
129	movl %esi,%edx // and the high word
130
131	popl %edi // Restore a volatile
132	popl %esi // Restore a volatile
133	popl %ebx // Restore a volatile
134	popl %ebp // Restore a volatile
135
2d21ac55	136	ret // Leave...
0c530ab8	137
b0d623f7 A	138
	139	/* void _rtc_nanotime_store(uint64_t tsc,
	140	uint64_t nsec,
	141	uint32_t scale,
	142	uint32_t shift,
	143	rtc_nanotime_t *dst) ;
	144	*/
2d21ac55	145	.globl EXT(_rtc_nanotime_store)
0c530ab8 A	146	.align FALIGN
0c530ab8 A	147
2d21ac55 A	148	LEXT(_rtc_nanotime_store)
	149	push %ebp
	150	movl %esp,%ebp
	151	push %esi
0c530ab8	152
2d21ac55 A	153	mov 32(%ebp),%edx /* get ptr to rtc_nanotime_info */
	154
	155	movl RNT_GENERATION(%edx),%esi /* get current generation */
	156	movl $0,RNT_GENERATION(%edx) /* flag data as being updated */
0c530ab8 A	157
	158	mov 8(%ebp),%eax
	159	mov %eax,RNT_TSC_BASE(%edx)
	160	mov 12(%ebp),%eax
	161	mov %eax,RNT_TSC_BASE+4(%edx)
	162
	163	mov 24(%ebp),%eax
	164	mov %eax,RNT_SCALE(%edx)
	165
	166	mov 28(%ebp),%eax
	167	mov %eax,RNT_SHIFT(%edx)
	168
	169	mov 16(%ebp),%eax
	170	mov %eax,RNT_NS_BASE(%edx)
	171	mov 20(%ebp),%eax
	172	mov %eax,RNT_NS_BASE+4(%edx)
2d21ac55 A	173
	174	incl %esi /* next generation */
	175	jnz 1f
	176	incl %esi /* skip 0, which is a flag */
	177	1: movl %esi,RNT_GENERATION(%edx) /* update generation and make usable */
0c530ab8	178
2d21ac55	179	pop %esi
0c530ab8 A	180	pop %ebp
	181	ret
	182
0c530ab8	183
2d21ac55 A	184	/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow );
	185	*
	186	* This is the same as the commpage nanotime routine, except that it uses the
	187	* kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies
cf7d32b8 A	188	* of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped().
	189	*
	190	* Warning! There is another copy of this code in osfmk/i386/locore.s. The
	191	* two versions must be kept in sync with each other!
2d21ac55 A	192	*
	193	* There are actually two versions of the algorithm, one each for "slow" and "fast"
	194	* processors. The more common "fast" algorithm is:
	195	*
	196	* nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base;
	197	*
	198	* Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization:
	199	*
	200	* rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
	201	*
	202	* The "slow" algorithm uses long division:
	203	*
	204	* nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base;
	205	*
	206	* Since this routine is not synchronized and can be called in any context,
	207	* we use a generation count to guard against seeing partially updated data. In addition,
	208	* the _rtc_nanotime_store() routine -- just above -- zeroes the generation before
	209	* updating the data, and stores the nonzero generation only after all other data has been
	210	* stored. Because IA32 guarantees that stores by one processor must be seen in order
	211	* by another, we can avoid using a lock. We spin while the generation is zero.
	212	*
	213	* In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax.
	214	*/
	215
	216	.globl EXT(_rtc_nanotime_read)
	217	.align FALIGN
	218	LEXT(_rtc_nanotime_read)
	219	pushl %ebp
	220	movl %esp,%ebp
	221	pushl %esi
	222	pushl %edi
	223	pushl %ebx
	224	movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */
	225	movl 12(%ebp),%eax /* get "slow" flag */
	226	testl %eax,%eax
	227	jnz Lslow
	228
	229	/* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
593a1d5f	230	RTC_NANOTIME_READ_FAST()
1c79356b	231
2d21ac55 A	232	popl %ebx
	233	popl %edi
	234	popl %esi
	235	popl %ebp
	236	ret
	237
	238	/* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */
	239	Lslow:
	240	movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */
	241	testl %esi,%esi /* if being changed, loop until stable */
	242	jz Lslow
	243	pushl %esi /* save generation */
	244	pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */
	245
c910b4d9 A	246	lfence
	247	rdtsc /* get TSC in %edx:%eax */
	248	lfence
2d21ac55 A	249	subl RNT_TSC_BASE(%edi),%eax
	250	sbbl RNT_TSC_BASE+4(%edi),%edx
	251
	252	/*
	253	* Do the math to convert tsc ticks to nanoseconds. We first
	254	* do long multiply of 1 billion times the tsc. Then we do
	255	* long division by the tsc frequency
	256	*/
	257	mov $1000000000, %ecx /* number of nanoseconds in a second */
	258	mov %edx, %ebx
	259	mul %ecx
	260	mov %edx, %edi
	261	mov %eax, %esi
	262	mov %ebx, %eax
	263	mul %ecx
	264	add %edi, %eax
	265	adc $0, %edx /* result in edx:eax:esi */
	266	mov %eax, %edi
	267	popl %ecx /* get low 32 tscFreq */
	268	xor %eax, %eax
	269	xchg %edx, %eax
	270	div %ecx
	271	xor %eax, %eax
	272	mov %edi, %eax
	273	div %ecx
	274	mov %eax, %ebx
	275	mov %esi, %eax
	276	div %ecx
	277	mov %ebx, %edx /* result in edx:eax */
	278
	279	movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */
	280	popl %esi /* recover generation */
43866e37	281
2d21ac55 A	282	addl RNT_NS_BASE(%edi),%eax
	283	adcl RNT_NS_BASE+4(%edi),%edx
	284
	285	cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */
	286	jne Lslow /* yes, loop until stable */
	287
	288	pop %ebx
	289	pop %edi
	290	pop %esi
0c530ab8	291	pop %ebp
2d21ac55 A	292	ret /* result in edx:eax */
2d21ac55 A	293