+/* define assembly routines */
+
+
+/*
+ * Inlines to get timestamp counter value.
+ */
+
+inline static uint64_t
+rdtsc_64(void)
+{
+ uint64_t result;
+ asm volatile("rdtsc": "=A" (result));
+ return result;
+}
+
+// create_mul_quant_GHZ create a constant that can be used to multiply
+// the TSC by to create nanoseconds. This is a 32 bit number
+// and the TSC *MUST* have a frequency higher than 1000Mhz for this routine to work
+//
+// The theory here is that we know how many TSCs-per-sec the processor runs at. Normally to convert this
+// to nanoseconds you would multiply the current time stamp by 1000000000 (a billion) then divide
+// by TSCs-per-sec to get nanoseconds. Unfortunatly the TSC is 64 bits which would leave us with
+// 96 bit intermediate results from the dultiply that must be divided by.
+// usually thats
+// uint96 = tsc * numer
+// nanos = uint96 / denom
+// Instead, we create this quant constant and it becomes the numerator, the denominator
+// can then be 0x100000000 which makes our division as simple as forgetting the lower 32 bits
+// of the result. We can also pass this number to user space as the numer and pass 0xFFFFFFFF
+// as the denom to converting raw counts to nanos. the difference is so small as to be undetectable
+// by anything.
+// unfortunatly we can not do this for sub GHZ processors. In that case, all we do is pass the CPU
+// speed in raw as the denom and we pass in 1000000000 as the numerator. No short cuts allowed
+
+inline static uint32_t
+create_mul_quant_GHZ(uint32_t quant)
+{
+ return (uint32_t)((50000000ULL << 32) / quant);
+}
+
+// this routine takes a value of raw TSC ticks and applies the passed mul_quant
+// generated by create_mul_quant() This is our internal routine for creating
+// nanoseconds
+// since we don't really have uint96_t this routine basically does this....
+// uint96_t intermediate = (*value) * scale
+// return (intermediate >> 32)
+inline static uint64_t
+fast_get_nano_from_abs(uint64_t value, int scale)
+{
+ asm (" movl %%edx,%%esi \n\t"
+ " mull %%ecx \n\t"
+ " movl %%edx,%%edi \n\t"
+ " movl %%esi,%%eax \n\t"
+ " mull %%ecx \n\t"
+ " xorl %%ecx,%%ecx \n\t"
+ " addl %%edi,%%eax \n\t"
+ " adcl %%ecx,%%edx "
+ : "+A" (value)
+ : "c" (scale)
+ : "%esi", "%edi");
+ return value;
+}
+
+/*
+ * this routine basically does this...
+ * ts.tv_sec = nanos / 1000000000; create seconds
+ * ts.tv_nsec = nanos % 1000000000; create remainder nanos
+ */
+inline static mach_timespec_t
+nanos_to_timespec(uint64_t nanos)
+{
+ union {
+ mach_timespec_t ts;
+ uint64_t u64;
+ } ret;
+ ret.u64 = nanos;
+ asm volatile("divl %1" : "+A" (ret.u64) : "r" (NSEC_PER_SEC));
+ return ret.ts;
+}
+
+// the following two routine perform the 96 bit arithmetic we need to
+// convert generic absolute<->nanoseconds
+// the multiply routine takes a uint64_t and a uint32_t and returns the result in a
+// uint32_t[3] array. the dicide routine takes this uint32_t[3] array and
+// divides it by a uint32_t returning a uint64_t
+inline static void
+longmul(uint64_t *abstime, uint32_t multiplicand, uint32_t *result)
+{
+ asm volatile(
+ " pushl %%ebx \n\t"
+ " movl %%eax,%%ebx \n\t"
+ " movl (%%eax),%%eax \n\t"
+ " mull %%ecx \n\t"
+ " xchg %%eax,%%ebx \n\t"
+ " pushl %%edx \n\t"
+ " movl 4(%%eax),%%eax \n\t"
+ " mull %%ecx \n\t"
+ " movl %2,%%ecx \n\t"
+ " movl %%ebx,(%%ecx) \n\t"
+ " popl %%ebx \n\t"
+ " addl %%ebx,%%eax \n\t"
+ " popl %%ebx \n\t"
+ " movl %%eax,4(%%ecx) \n\t"
+ " adcl $0,%%edx \n\t"
+ " movl %%edx,8(%%ecx) // and save it"
+ : : "a"(abstime), "c"(multiplicand), "m"(result));
+
+}
+
+inline static uint64_t
+longdiv(uint32_t *numer, uint32_t denom)
+{
+ uint64_t result;
+ asm volatile(
+ " pushl %%ebx \n\t"
+ " movl %%eax,%%ebx \n\t"
+ " movl 8(%%eax),%%edx \n\t"
+ " movl 4(%%eax),%%eax \n\t"
+ " divl %%ecx \n\t"
+ " xchg %%ebx,%%eax \n\t"
+ " movl (%%eax),%%eax \n\t"
+ " divl %%ecx \n\t"
+ " xchg %%ebx,%%edx \n\t"
+ " popl %%ebx \n\t"
+ : "=A"(result) : "a"(numer),"c"(denom));
+ return result;
+}
+
+#define PIT_Mode4 0x08 /* turn on mode 4 one shot software trigger */
+
+// Enable or disable timer 2.
+inline static void
+enable_PIT2()
+{
+ asm volatile(
+ " inb $97,%%al \n\t"
+ " and $253,%%al \n\t"
+ " or $1,%%al \n\t"
+ " outb %%al,$97 \n\t"
+ : : : "%al" );
+}
+
+inline static void
+disable_PIT2()
+{
+ asm volatile(
+ " inb $97,%%al \n\t"
+ " and $253,%%al \n\t"
+ " outb %%al,$97 \n\t"
+ : : : "%al" );
+}
+
+// ctimeRDTSC() routine sets up counter 2 to count down 1/20 of a second
+// it pauses until the value is latched in the counter
+// and then reads the time stamp counter to return to the caller
+// utility routine
+// Code to calculate how many processor cycles are in a second...
+inline static void
+set_PIT2(int value)
+{
+// first, tell the clock we are going to write 16 bytes to the counter and enable one-shot mode
+// then write the two bytes into the clock register.
+// loop until the value is "realized" in the clock, this happens on the next tick
+//
+ asm volatile(
+ " movb $184,%%al \n\t"
+ " outb %%al,$67 \n\t"
+ " movb %%dl,%%al \n\t"
+ " outb %%al,$66 \n\t"
+ " movb %%dh,%%al \n\t"
+ " outb %%al,$66 \n"
+"1: inb $66,%%al \n\t"
+ " inb $66,%%al \n\t"
+ " cmp %%al,%%dh \n\t"
+ " jne 1b"
+ : : "d"(value) : "%al");
+}
+
+inline static uint64_t
+get_PIT2(unsigned int *value)
+{
+// this routine first latches the time, then gets the time stamp so we know
+// how long the read will take later. Reads
+ register uint64_t result;
+ asm volatile(
+ " xorl %%ecx,%%ecx \n\t"
+ " movb $128,%%al \n\t"
+ " outb %%al,$67 \n\t"
+ " rdtsc \n\t"
+ " pushl %%eax \n\t"
+ " inb $66,%%al \n\t"
+ " movb %%al,%%cl \n\t"
+ " inb $66,%%al \n\t"
+ " movb %%al,%%ch \n\t"
+ " popl %%eax "
+ : "=A"(result), "=c"(*value));
+ return result;
+}
+
+static uint32_t
+timeRDTSC(void)
+{
+ uint64_t latchTime;
+ uint64_t saveTime,intermediate;
+ unsigned int timerValue,x;
+ boolean_t int_enabled;
+ uint64_t fact[6] = { 2000011734ll,
+ 2000045259ll,
+ 2000078785ll,
+ 2000112312ll,
+ 2000145841ll,
+ 2000179371ll};
+
+ int_enabled = ml_set_interrupts_enabled(FALSE);
+
+ enable_PIT2(); // turn on PIT2
+ set_PIT2(0); // reset timer 2 to be zero
+ latchTime = rdtsc_64(); // get the time stamp to time
+ latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes
+ set_PIT2(59658); // set up the timer to count 1/20th a second
+ saveTime = rdtsc_64(); // now time how ling a 20th a second is...
+ get_PIT2(&x);
+ do { get_PIT2(&timerValue); x = timerValue;} while (timerValue > x);
+ do {
+ intermediate = get_PIT2(&timerValue);
+ if (timerValue>x) printf("Hey we are going backwards! %d, %d\n",timerValue,x);
+ x = timerValue;
+ } while ((timerValue != 0) && (timerValue >5));
+ printf("Timer value:%d\n",timerValue);
+ printf("intermediate 0x%08x:0x%08x\n",intermediate);
+ printf("saveTime 0x%08x:0x%08x\n",saveTime);
+
+ intermediate = intermediate - saveTime; // raw # of tsc's it takes for about 1/20 second
+ intermediate = intermediate * fact[timerValue]; // actual time spent
+ intermediate = intermediate / 2000000000ll; // rescale so its exactly 1/20 a second
+ intermediate = intermediate + latchTime; // add on our save fudge
+ set_PIT2(0); // reset timer 2 to be zero
+ disable_PIT2(0); // turn off PIT 2
+ ml_set_interrupts_enabled(int_enabled);
+ return intermediate;
+}
+
+static uint64_t
+rdtsctime_to_nanoseconds( void )
+{
+ uint32_t numer;
+ uint32_t denom;
+ uint64_t abstime;
+
+ uint32_t intermediate[3];
+
+ numer = rtclock.timebase_const.numer;
+ denom = rtclock.timebase_const.denom;
+ abstime = rdtsc_64();
+ if (denom == 0xFFFFFFFF) {
+ abstime = fast_get_nano_from_abs(abstime, numer);
+ } else {
+ longmul(&abstime, numer, intermediate);
+ abstime = longdiv(intermediate, denom);
+ }
+ return abstime;
+}
+
+inline static mach_timespec_t
+rdtsc_to_timespec(void)
+{
+ uint64_t currNanos;
+ currNanos = rdtsctime_to_nanoseconds();
+ return nanos_to_timespec(currNanos);
+}
+