X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d7e50217d7adf6e52786a38bcaa4cd698cb9a79e..6601e61aa18bf4f09af135ff61fc7f4771d23b06:/osfmk/i386/rtclock.c?ds=sidebyside diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c index 43ab1f3be..d6368afa6 100644 --- a/osfmk/i386/rtclock.c +++ b/osfmk/i386/rtclock.c @@ -1,24 +1,21 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. * * @APPLE_LICENSE_HEADER_END@ */ @@ -29,21 +26,26 @@ /* * File: i386/rtclock.c * Purpose: Routines for handling the machine dependent - * real-time clock. This clock is generated by - * the Intel 8254 Programmable Interval Timer. + * real-time clock. Historically, this clock is + * generated by the Intel 8254 Programmable Interval + * Timer, but local apic timers are now used for + * this purpose with the master time reference being + * the cpu clock counted by the timestamp MSR. */ -#include #include -#include #include -#include + +#include + #include +#include #include +#include #include #include #include -#include /* HZ */ +#include #include #include #include /* for kernel_map */ @@ -51,8 +53,26 @@ #include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define MIN(a,b) (((a)>(b))?(b):(a)) + +#define NSEC_PER_HZ (NSEC_PER_SEC / 100) /* nsec per tick */ + +#define UI_CPUFREQ_ROUNDING_FACTOR 10000000 int sysclk_config(void); @@ -66,37 +86,16 @@ kern_return_t sysclk_getattr( clock_attr_t attr, mach_msg_type_number_t *count); -kern_return_t sysclk_setattr( - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t count); - void sysclk_setalarm( mach_timespec_t *alarm_time); -extern void (*IOKitRegisterInterruptHook)(void *, int irq, int isclock); - -/* - * Inlines to get timestamp counter value. - */ - -static inline void rdtsc_hilo(uint32_t *hi, uint32_t *lo) { - asm volatile("rdtsc": "=a" (*lo), "=d" (*hi)); -} - -static inline uint64_t rdtsc_64(void) { - uint64_t result; - asm volatile("rdtsc": "=A" (result)); - return result; -} - /* * Lists of clock routines. */ struct clock_ops sysclk_ops = { sysclk_config, sysclk_init, sysclk_gettime, 0, - sysclk_getattr, sysclk_setattr, + sysclk_getattr, 0, sysclk_setalarm, }; @@ -107,9 +106,6 @@ int calend_init(void); kern_return_t calend_gettime( mach_timespec_t *cur_time); -kern_return_t calend_settime( - mach_timespec_t *cur_time); - kern_return_t calend_getattr( clock_flavor_t flavor, clock_attr_t attr, @@ -117,63 +113,65 @@ kern_return_t calend_getattr( struct clock_ops calend_ops = { calend_config, calend_init, - calend_gettime, calend_settime, + calend_gettime, 0, calend_getattr, 0, 0, }; /* local data declarations */ -mach_timespec_t *RtcTime = (mach_timespec_t *)0; -mach_timespec_t *RtcAlrm; -clock_res_t RtcDelt; -/* global data declarations */ -struct { - uint64_t abstime; +static clock_timer_func_t rtclock_timer_expire; + +static timer_call_data_t rtclock_alarm_timer; - mach_timespec_t time; - mach_timespec_t alarm_time; /* time of next alarm */ +static void rtclock_alarm_expire( + timer_call_param_t p0, + timer_call_param_t p1); - mach_timespec_t calend_offset; +struct { + mach_timespec_t calend_offset; boolean_t calend_is_set; - uint64_t timer_deadline; - boolean_t timer_is_set; - clock_timer_func_t timer_expire; + int64_t calend_adjtotal; + int32_t calend_adjdelta; + + uint32_t boottime; - clock_res_t new_ires; /* pending new resolution (nano ) */ - clock_res_t intr_nsec; /* interrupt resolution (nano) */ + mach_timebase_info_data_t timebase_const; decl_simple_lock_data(,lock) /* real-time clock device lock */ } rtclock; -unsigned int clknum; /* clks per second */ -unsigned int new_clknum; /* pending clknum */ -unsigned int time_per_clk; /* time per clk in ZHZ */ -unsigned int clks_per_int; /* clks per interrupt */ -unsigned int clks_per_int_99; -int rtc_intr_count; /* interrupt counter */ -int rtc_intr_hertz; /* interrupts per HZ */ -int rtc_intr_freq; /* interrupt frequency */ -int rtc_print_lost_tick; /* print lost tick */ +boolean_t rtc_initialized = FALSE; +clock_res_t rtc_intr_nsec = NSEC_PER_HZ; /* interrupt res */ +uint64_t rtc_cycle_count; /* clocks in 1/20th second */ +uint64_t rtc_cyc_per_sec; /* processor cycles per sec */ +uint32_t rtc_boot_frequency; /* provided by 1st speed-step */ +uint32_t rtc_quant_scale; /* clock to nanos multiplier */ +uint32_t rtc_quant_shift; /* clock to nanos right shift */ +uint64_t rtc_decrementer_min; -uint32_t rtc_cyc_per_sec; /* processor cycles per seconds */ -uint32_t rtc_last_int_tsc_lo; /* tsc values saved per interupt */ -uint32_t rtc_last_int_tsc_hi; +static mach_timebase_info_data_t rtc_lapic_scale; /* nsec to lapic count */ /* - * Macros to lock/unlock real-time clock device. + * Macros to lock/unlock real-time clock data. */ -#define LOCK_RTC(s) \ -MACRO_BEGIN \ - (s) = splclock(); \ - simple_lock(&rtclock.lock); \ +#define RTC_INTRS_OFF(s) \ + (s) = splclock() + +#define RTC_INTRS_ON(s) \ + splx(s) + +#define RTC_LOCK(s) \ +MACRO_BEGIN \ + RTC_INTRS_OFF(s); \ + simple_lock(&rtclock.lock); \ MACRO_END -#define UNLOCK_RTC(s) \ -MACRO_BEGIN \ +#define RTC_UNLOCK(s) \ +MACRO_BEGIN \ simple_unlock(&rtclock.lock); \ - splx(s); \ + RTC_INTRS_ON(s); \ MACRO_END /* @@ -182,114 +180,414 @@ MACRO_END * The i8254 is a traditional PC device with some arbitrary characteristics. * Basically, it is a register that counts at a fixed rate and can be * programmed to generate an interrupt every N counts. The count rate is - * clknum counts per second (see pit.h), historically 1193167 we believe. + * clknum counts per sec (see pit.h), historically 1193167=14.318MHz/12 + * but the more accurate value is 1193182=14.31818MHz/12. [14.31818 MHz being + * the master crystal oscillator reference frequency since the very first PC.] * Various constants are computed based on this value, and we calculate * them at init time for execution efficiency. To obtain sufficient * accuracy, some of the calculation are most easily done in floating * point and then converted to int. * - * We want an interrupt every 10 milliseconds, approximately. The count - * which will do that is clks_per_int. However, that many counts is not - * *exactly* 10 milliseconds; it is a bit more or less depending on - * roundoff. The actual time per tick is calculated and saved in - * rtclock.intr_nsec, and it is that value which is added to the time - * register on each tick. - * - * The i8254 counter can be read between interrupts in order to determine - * the time more accurately. The counter counts down from the preset value - * toward 0, and we have to handle the case where the counter has been - * reset just before being read and before the interrupt has been serviced. - * Given a count since the last interrupt, the time since then is given - * by (count * time_per_clk). In order to minimize integer truncation, - * we perform this calculation in an arbitrary unit of time which maintains - * the maximum precision, i.e. such that one tick is 1.0e9 of these units, - * or close to the precision of a 32-bit int. We then divide by this unit - * (which doesn't lose precision) to get nanoseconds. For notation - * purposes, this unit is defined as ZHZ = zanoseconds per nanosecond. + */ + +/* + * Forward decl. + */ + +static uint64_t rtc_set_cyc_per_sec(uint64_t cycles); +uint64_t rtc_nanotime_read(void); + +/* + * create_mul_quant_GHZ + * create a constant used to multiply the TSC by to convert to nanoseconds. + * This is a 32 bit number and the TSC *MUST* have a frequency higher than + * 1000Mhz for this routine to work. * - * This sequence to do all this is in sysclk_gettime. For efficiency, this - * sequence also needs the value that the counter will have if it has just - * overflowed, so we precompute that also. ALSO, certain platforms - * (specifically the DEC XL5100) have been observed to have problem - * with latching the counter, and they occasionally (say, one out of - * 100,000 times) return a bogus value. Hence, the present code reads - * the counter twice and checks for a consistent pair of values. + * The theory here is that we know how many TSCs-per-sec the processor runs at. + * Normally to convert this to nanoseconds you would multiply the current + * timestamp by 1000000000 (a billion) then divide by TSCs-per-sec. + * Unfortunatly the TSC is 64 bits which would leave us with 96 bit intermediate + * results from the multiply that must be divided by. + * Usually thats + * uint96 = tsc * numer + * nanos = uint96 / denom + * Instead, we create this quant constant and it becomes the numerator, + * the denominator can then be 0x100000000 which makes our division as simple as + * forgetting the lower 32 bits of the result. We can also pass this number to + * user space as the numer and pass 0xFFFFFFFF (RTC_FAST_DENOM) as the denom to + * convert raw counts * to nanos. The difference is so small as to be + * undetectable by anything. * - * Some attributes of the rt clock can be changed, including the - * interrupt resolution. We default to the minimum resolution (10 ms), - * but allow a finer resolution to be requested. The assumed frequency - * of the clock can also be set since it appears that the actual - * frequency of real-world hardware can vary from the nominal by - * 200 ppm or more. When the frequency is set, the values above are - * recomputed and we continue without resetting or changing anything else. + * Unfortunatly we can not do this for sub GHZ processors. In this case, all + * we do is pass the CPU speed in raw as the denom and we pass in 1000000000 + * as the numerator. No short cuts allowed */ -#define RTC_MINRES (NSEC_PER_SEC / HZ) /* nsec per tick */ -#define RTC_MAXRES (RTC_MINRES / 20) /* nsec per tick */ -#define ZANO (1000000000) -#define ZHZ (ZANO / (NSEC_PER_SEC / HZ)) -#define READ_8254(val) { \ - outb(PITCTL_PORT, PIT_C0); \ - (val) = inb(PITCTR0_PORT); \ - (val) |= inb(PITCTR0_PORT) << 8 ; } - +#define RTC_FAST_DENOM 0xFFFFFFFF +inline static uint32_t +create_mul_quant_GHZ(int shift, uint32_t quant) +{ + return (uint32_t)((((uint64_t)NSEC_PER_SEC/20) << shift) / quant); +} /* - * Calibration delay counts. + * This routine takes a value of raw TSC ticks and applies the passed mul_quant + * generated by create_mul_quant() This is our internal routine for creating + * nanoseconds. + * Since we don't really have uint96_t this routine basically does this.... + * uint96_t intermediate = (*value) * scale + * return (intermediate >> 32) */ -unsigned int delaycount = 100; -unsigned int microdata = 50; +inline static uint64_t +fast_get_nano_from_abs(uint64_t value, int scale) +{ + asm (" movl %%edx,%%esi \n\t" + " mull %%ecx \n\t" + " movl %%edx,%%edi \n\t" + " movl %%esi,%%eax \n\t" + " mull %%ecx \n\t" + " xorl %%ecx,%%ecx \n\t" + " addl %%edi,%%eax \n\t" + " adcl %%ecx,%%edx " + : "+A" (value) + : "c" (scale) + : "%esi", "%edi"); + return value; +} /* - * Forward decl. + * This routine basically does this... + * ts.tv_sec = nanos / 1000000000; create seconds + * ts.tv_nsec = nanos % 1000000000; create remainder nanos */ +inline static mach_timespec_t +nanos_to_timespec(uint64_t nanos) +{ + union { + mach_timespec_t ts; + uint64_t u64; + } ret; + ret.u64 = nanos; + asm volatile("divl %1" : "+A" (ret.u64) : "r" (NSEC_PER_SEC)); + return ret.ts; +} -extern int measure_delay(int us); -void rtc_setvals( unsigned int, clock_res_t ); +/* + * The following two routines perform the 96 bit arithmetic we need to + * convert generic absolute<->nanoseconds + * The multiply routine takes a uint64_t and a uint32_t and returns the result + * in a uint32_t[3] array. + * The divide routine takes this uint32_t[3] array and divides it by a uint32_t + * returning a uint64_t + */ +inline static void +longmul(uint64_t *abstime, uint32_t multiplicand, uint32_t *result) +{ + asm volatile( + " pushl %%ebx \n\t" + " movl %%eax,%%ebx \n\t" + " movl (%%eax),%%eax \n\t" + " mull %%ecx \n\t" + " xchg %%eax,%%ebx \n\t" + " pushl %%edx \n\t" + " movl 4(%%eax),%%eax \n\t" + " mull %%ecx \n\t" + " movl %2,%%ecx \n\t" + " movl %%ebx,(%%ecx) \n\t" + " popl %%ebx \n\t" + " addl %%ebx,%%eax \n\t" + " popl %%ebx \n\t" + " movl %%eax,4(%%ecx) \n\t" + " adcl $0,%%edx \n\t" + " movl %%edx,8(%%ecx) // and save it" + : : "a"(abstime), "c"(multiplicand), "m"(result)); + +} -static void rtc_set_cyc_per_sec(); +inline static uint64_t +longdiv(uint32_t *numer, uint32_t denom) +{ + uint64_t result; + asm volatile( + " pushl %%ebx \n\t" + " movl %%eax,%%ebx \n\t" + " movl 8(%%eax),%%edx \n\t" + " movl 4(%%eax),%%eax \n\t" + " divl %%ecx \n\t" + " xchg %%ebx,%%eax \n\t" + " movl (%%eax),%%eax \n\t" + " divl %%ecx \n\t" + " xchg %%ebx,%%edx \n\t" + " popl %%ebx \n\t" + : "=A"(result) : "a"(numer),"c"(denom)); + return result; +} /* - * Initialize non-zero clock structure values. + * Enable or disable timer 2. + * Port 0x61 controls timer 2: + * bit 0 gates the clock, + * bit 1 gates output to speaker. */ -void -rtc_setvals( - unsigned int new_clknum, - clock_res_t new_ires - ) +inline static void +enable_PIT2(void) { - unsigned int timeperclk; - unsigned int scale0; - unsigned int scale1; - unsigned int res; + asm volatile( + " inb $0x61,%%al \n\t" + " and $0xFC,%%al \n\t" + " or $1,%%al \n\t" + " outb %%al,$0x61 \n\t" + : : : "%al" ); +} - clknum = new_clknum; - rtc_intr_freq = (NSEC_PER_SEC / new_ires); - rtc_intr_hertz = rtc_intr_freq / HZ; - clks_per_int = (clknum + (rtc_intr_freq / 2)) / rtc_intr_freq; - clks_per_int_99 = clks_per_int - clks_per_int/100; +inline static void +disable_PIT2(void) +{ + asm volatile( + " inb $0x61,%%al \n\t" + " and $0xFC,%%al \n\t" + " outb %%al,$0x61 \n\t" + : : : "%al" ); +} - /* - * The following calculations are done with scaling integer operations - * in order that the integer results are accurate to the lsb. - */ - timeperclk = div_scale(ZANO, clknum, &scale0); /* 838.105647 nsec */ +inline static void +set_PIT2(int value) +{ +/* + * First, tell the clock we are going to write 16 bits to the counter + * and enable one-shot mode (command 0xB8 to port 0x43) + * Then write the two bytes into the PIT2 clock register (port 0x42). + * Loop until the value is "realized" in the clock, + * this happens on the next tick. + */ + asm volatile( + " movb $0xB8,%%al \n\t" + " outb %%al,$0x43 \n\t" + " movb %%dl,%%al \n\t" + " outb %%al,$0x42 \n\t" + " movb %%dh,%%al \n\t" + " outb %%al,$0x42 \n" +"1: inb $0x42,%%al \n\t" + " inb $0x42,%%al \n\t" + " cmp %%al,%%dh \n\t" + " jne 1b" + : : "d"(value) : "%al"); +} - time_per_clk = mul_scale(ZHZ, timeperclk, &scale1); /* 83810 */ - if (scale0 > scale1) - time_per_clk >>= (scale0 - scale1); - else if (scale0 < scale1) - panic("rtc_clock: time_per_clk overflow\n"); +inline static uint64_t +get_PIT2(unsigned int *value) +{ + register uint64_t result; +/* + * This routine first latches the time (command 0x80 to port 0x43), + * then gets the time stamp so we know how long the read will take later. + * Read (from port 0x42) and return the current value of the timer. + */ + asm volatile( + " xorl %%ecx,%%ecx \n\t" + " movb $0x80,%%al \n\t" + " outb %%al,$0x43 \n\t" + " rdtsc \n\t" + " pushl %%eax \n\t" + " inb $0x42,%%al \n\t" + " movb %%al,%%cl \n\t" + " inb $0x42,%%al \n\t" + " movb %%al,%%ch \n\t" + " popl %%eax " + : "=A"(result), "=c"(*value)); + return result; +} +/* + * timeRDTSC() + * This routine sets up PIT counter 2 to count down 1/20 of a second. + * It pauses until the value is latched in the counter + * and then reads the time stamp counter to return to the caller. + */ +static uint64_t +timeRDTSC(void) +{ + int attempts = 0; + uint64_t latchTime; + uint64_t saveTime,intermediate; + unsigned int timerValue, lastValue; + boolean_t int_enabled; /* - * Notice that rtclock.intr_nsec is signed ==> use unsigned int res + * Table of correction factors to account for + * - timer counter quantization errors, and + * - undercounts 0..5 */ - res = mul_scale(clks_per_int, timeperclk, &scale1); /* 10000276 */ - if (scale0 > scale1) - rtclock.intr_nsec = res >> (scale0 - scale1); - else - panic("rtc_clock: rtclock.intr_nsec overflow\n"); - - rtc_intr_count = 1; - RtcDelt = rtclock.intr_nsec/2; +#define SAMPLE_CLKS_EXACT (((double) CLKNUM) / 20.0) +#define SAMPLE_CLKS_INT ((int) CLKNUM / 20) +#define SAMPLE_NSECS (2000000000LL) +#define SAMPLE_MULTIPLIER (((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT) +#define ROUND64(x) ((uint64_t)((x) + 0.5)) + uint64_t scale[6] = { + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5)) + }; + + int_enabled = ml_set_interrupts_enabled(FALSE); + +restart: + if (attempts >= 2) + panic("timeRDTSC() calibation failed with %d attempts\n", attempts); + attempts++; + enable_PIT2(); // turn on PIT2 + set_PIT2(0); // reset timer 2 to be zero + latchTime = rdtsc64(); // get the time stamp to time + latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes + set_PIT2(SAMPLE_CLKS_INT); // set up the timer for (almost) 1/20th a second + saveTime = rdtsc64(); // now time how long a 20th a second is... + get_PIT2(&lastValue); + get_PIT2(&lastValue); // read twice, first value may be unreliable + do { + intermediate = get_PIT2(&timerValue); + if (timerValue > lastValue) { + printf("Hey we are going backwards! %u -> %u, restarting timing\n", + timerValue,lastValue); + set_PIT2(0); + disable_PIT2(); + goto restart; + } + lastValue = timerValue; + } while (timerValue > 5); + kprintf("timerValue %d\n",timerValue); + kprintf("intermediate 0x%016llx\n",intermediate); + kprintf("saveTime 0x%016llx\n",saveTime); + + intermediate -= saveTime; // raw count for about 1/20 second + intermediate *= scale[timerValue]; // rescale measured time spent + intermediate /= SAMPLE_NSECS; // so its exactly 1/20 a second + intermediate += latchTime; // add on our save fudge + + set_PIT2(0); // reset timer 2 to be zero + disable_PIT2(); // turn off PIT 2 + + ml_set_interrupts_enabled(int_enabled); + return intermediate; +} + +static uint64_t +tsc_to_nanoseconds(uint64_t abstime) +{ + uint32_t numer; + uint32_t denom; + uint32_t intermediate[3]; + + numer = rtclock.timebase_const.numer; + denom = rtclock.timebase_const.denom; + if (denom == RTC_FAST_DENOM) { + abstime = fast_get_nano_from_abs(abstime, numer); + } else { + longmul(&abstime, numer, intermediate); + abstime = longdiv(intermediate, denom); + } + return abstime; +} + +inline static mach_timespec_t +tsc_to_timespec(void) +{ + uint64_t currNanos; + currNanos = rtc_nanotime_read(); + return nanos_to_timespec(currNanos); +} + +#define DECREMENTER_MAX UINT_MAX +static uint32_t +deadline_to_decrementer( + uint64_t deadline, + uint64_t now) +{ + uint64_t delta; + + if (deadline <= now) + return rtc_decrementer_min; + else { + delta = deadline - now; + return MIN(MAX(rtc_decrementer_min,delta),DECREMENTER_MAX); + } +} + +static inline uint64_t +lapic_time_countdown(uint32_t initial_count) +{ + boolean_t state; + uint64_t start_time; + uint64_t stop_time; + lapic_timer_count_t count; + + state = ml_set_interrupts_enabled(FALSE); + lapic_set_timer(FALSE, one_shot, divide_by_1, initial_count); + start_time = rdtsc64(); + do { + lapic_get_timer(NULL, NULL, NULL, &count); + } while (count > 0); + stop_time = rdtsc64(); + ml_set_interrupts_enabled(state); + + return tsc_to_nanoseconds(stop_time - start_time); +} + +static void +rtc_lapic_timer_calibrate(void) +{ + uint32_t nsecs; + uint64_t countdown; + + if (!(cpuid_features() & CPUID_FEATURE_APIC)) + return; + + /* + * Set the local apic timer counting down to zero without an interrupt. + * Use the timestamp to calculate how long this takes. + */ + nsecs = (uint32_t) lapic_time_countdown(rtc_intr_nsec); + + /* + * Compute a countdown ratio for a given time in nanoseconds. + * That is, countdown = time * numer / denom. + */ + countdown = (uint64_t)rtc_intr_nsec * (uint64_t)rtc_intr_nsec / nsecs; + + nsecs = (uint32_t) lapic_time_countdown((uint32_t) countdown); + + rtc_lapic_scale.numer = countdown; + rtc_lapic_scale.denom = nsecs; + + kprintf("rtc_lapic_timer_calibrate() scale: %d/%d\n", + (uint32_t) countdown, nsecs); +} + +static void +rtc_lapic_set_timer( + uint32_t interval) +{ + uint64_t count; + + assert(rtc_lapic_scale.denom); + + count = interval * (uint64_t) rtc_lapic_scale.numer; + count /= rtc_lapic_scale.denom; + + lapic_set_timer(TRUE, one_shot, divide_by_1, (uint32_t) count); +} + +static void +rtc_lapic_start_ticking(void) +{ + uint64_t abstime; + uint64_t first_tick; + uint64_t decr; + + abstime = mach_absolute_time(); + first_tick = abstime + NSEC_PER_HZ; + current_cpu_datap()->cpu_rtc_tick_deadline = first_tick; + decr = deadline_to_decrementer(first_tick, abstime); + rtc_lapic_set_timer(decr); } /* @@ -300,267 +598,415 @@ rtc_setvals( int sysclk_config(void) { - int RtcFlag; - int pic; -#if NCPUS > 1 mp_disable_preemption(); if (cpu_number() != master_cpu) { mp_enable_preemption(); return(1); } mp_enable_preemption(); -#endif + + timer_call_setup(&rtclock_alarm_timer, rtclock_alarm_expire, NULL); + + simple_lock_init(&rtclock.lock, 0); + + return (1); +} + + +/* + * Nanotime/mach_absolutime_time + * ----------------------------- + * The timestamp counter (tsc) - which counts cpu clock cycles and can be read + * efficient by the kernel and in userspace - is the reference for all timing. + * However, the cpu clock rate is not only platform-dependent but can change + * (speed-step) dynamically. Hence tsc is converted into nanoseconds which is + * identical to mach_absolute_time. The conversion to tsc to nanoseconds is + * encapsulated by nanotime. + * + * The kernel maintains nanotime information recording: + * - the current ratio of tsc to nanoseconds + * with this ratio expressed as a 32-bit scale and shift + * (power of 2 divider); + * - the tsc (step_tsc) and nanotime (step_ns) at which the current + * ratio (clock speed) began. + * So a tsc value can be converted to nanotime by: + * + * nanotime = (((tsc - step_tsc)*scale) >> shift) + step_ns + * + * In general, (tsc - step_tsc) is a 64-bit quantity with the scaling + * involving a 96-bit intermediate value. However, by saving the converted + * values at each tick (or at any intervening speed-step) - base_tsc and + * base_ns - we can perform conversions relative to these and be assured that + * (tsc - tick_tsc) is 32-bits. Hence: + * + * fast_nanotime = (((tsc - base_tsc)*scale) >> shift) + base_ns + * + * The tuple {base_tsc, base_ns, scale, shift} is exported in the commpage + * for the userspace nanotime routine to read. A duplicate check_tsc is + * appended so that the consistency of the read can be verified. Note that + * this scheme is essential for MP systems in which the commpage is updated + * by the master cpu but may be read concurrently by other cpus. + * + */ +static inline void +rtc_nanotime_set_commpage(rtc_nanotime_t *rntp) +{ + commpage_nanotime_t cp_nanotime; + + /* Only the master cpu updates the commpage */ + if (cpu_number() != master_cpu) + return; + + cp_nanotime.nt_base_tsc = rntp->rnt_tsc; + cp_nanotime.nt_base_ns = rntp->rnt_nanos; + cp_nanotime.nt_scale = rntp->rnt_scale; + cp_nanotime.nt_shift = rntp->rnt_shift; + + commpage_set_nanotime(&cp_nanotime); +} + +static void +rtc_nanotime_init(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + rtc_nanotime_t *master_rntp = &cpu_datap(master_cpu)->cpu_rtc_nanotime; + + if (cpu_number() == master_cpu) { + rntp->rnt_tsc = rdtsc64(); + rntp->rnt_nanos = tsc_to_nanoseconds(rntp->rnt_tsc); + rntp->rnt_scale = rtc_quant_scale; + rntp->rnt_shift = rtc_quant_shift; + rntp->rnt_step_tsc = 0ULL; + rntp->rnt_step_nanos = 0ULL; + } else { + /* + * Copy master processor's nanotime info. + * Loop required in case this changes while copying. + */ + do { + *rntp = *master_rntp; + } while (rntp->rnt_tsc != master_rntp->rnt_tsc); + } +} + +static inline void +_rtc_nanotime_update(rtc_nanotime_t *rntp, uint64_t tsc) +{ + uint64_t tsc_delta; + uint64_t ns_delta; + + tsc_delta = tsc - rntp->rnt_step_tsc; + ns_delta = tsc_to_nanoseconds(tsc_delta); + rntp->rnt_nanos = rntp->rnt_step_nanos + ns_delta; + rntp->rnt_tsc = tsc; +} + +static void +rtc_nanotime_update(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + + assert(get_preemption_level() > 0); + assert(!ml_get_interrupts_enabled()); + + _rtc_nanotime_update(rntp, rdtsc64()); + rtc_nanotime_set_commpage(rntp); +} + +static void +rtc_nanotime_scale_update(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + uint64_t tsc = rdtsc64(); + + assert(!ml_get_interrupts_enabled()); + /* - * Setup device. + * Update time based on past scale. */ -#if MP_V1_1 - { - extern boolean_t mp_v1_1_initialized; - if (mp_v1_1_initialized) - pic = 2; - else - pic = 0; - } -#else - pic = 0; /* FIXME .. interrupt registration moved to AppleIntelClock */ -#endif + _rtc_nanotime_update(rntp, tsc); + /* + * Update scale and timestamp this update. + */ + rntp->rnt_scale = rtc_quant_scale; + rntp->rnt_shift = rtc_quant_shift; + rntp->rnt_step_tsc = rntp->rnt_tsc; + rntp->rnt_step_nanos = rntp->rnt_nanos; + + /* Export update to userland */ + rtc_nanotime_set_commpage(rntp); +} + +static uint64_t +_rtc_nanotime_read(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + uint64_t rnt_tsc; + uint32_t rnt_scale; + uint32_t rnt_shift; + uint64_t rnt_nanos; + uint64_t tsc; + uint64_t tsc_delta; + + rnt_scale = rntp->rnt_scale; + if (rnt_scale == 0) + return 0ULL; + + rnt_shift = rntp->rnt_shift; + rnt_nanos = rntp->rnt_nanos; + rnt_tsc = rntp->rnt_tsc; + tsc = rdtsc64(); + + tsc_delta = tsc - rnt_tsc; + if ((tsc_delta >> 32) != 0) + return rnt_nanos + tsc_to_nanoseconds(tsc_delta); + + /* Let the compiler optimize(?): */ + if (rnt_shift == 32) + return rnt_nanos + ((tsc_delta * rnt_scale) >> 32); + else + return rnt_nanos + ((tsc_delta * rnt_scale) >> rnt_shift); +} + +uint64_t +rtc_nanotime_read(void) +{ + uint64_t result; + uint64_t rnt_tsc; + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + + /* + * Use timestamp to ensure the uptime record isn't changed. + * This avoids disabling interrupts. + * And not this is a per-cpu structure hence no locking. + */ + do { + rnt_tsc = rntp->rnt_tsc; + result = _rtc_nanotime_read(); + } while (rnt_tsc != rntp->rnt_tsc); + + return result; +} + + +/* + * This function is called by the speed-step driver when a + * change of cpu clock frequency is about to occur. + * The scale is not changed until rtc_clock_stepped() is called. + * Between these times there is an uncertainty is exactly when + * the change takes effect. FIXME: by using another timing source + * we could eliminate this error. + */ +void +rtc_clock_stepping(__unused uint32_t new_frequency, + __unused uint32_t old_frequency) +{ + boolean_t istate; + + istate = ml_set_interrupts_enabled(FALSE); + rtc_nanotime_scale_update(); + ml_set_interrupts_enabled(istate); +} + +/* + * This function is called by the speed-step driver when a + * change of cpu clock frequency has just occured. This change + * is expressed as a ratio relative to the boot clock rate. + */ +void +rtc_clock_stepped(uint32_t new_frequency, uint32_t old_frequency) +{ + boolean_t istate; + + istate = ml_set_interrupts_enabled(FALSE); + if (rtc_boot_frequency == 0) { + /* + * At the first ever stepping, old frequency is the real + * initial clock rate. This step and all others are based + * relative to this initial frequency at which the tsc + * calibration was made. Hence we must remember this base + * frequency as reference. + */ + rtc_boot_frequency = old_frequency; + } + rtc_set_cyc_per_sec(rtc_cycle_count * new_frequency / + rtc_boot_frequency); + rtc_nanotime_scale_update(); + ml_set_interrupts_enabled(istate); +} + +/* + * rtc_sleep_wakeup() is called from acpi on awakening from a S3 sleep + */ +void +rtc_sleep_wakeup(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + + boolean_t istate; + + istate = ml_set_interrupts_enabled(FALSE); /* - * We should attempt to test the real-time clock - * device here. If it were to fail, we should panic - * the system. + * Reset nanotime. + * The timestamp counter will have been reset + * but nanotime (uptime) marches onward. + * We assume that we're still at the former cpu frequency. */ - RtcFlag = /* test device */1; - printf("realtime clock configured\n"); + rntp->rnt_tsc = rdtsc64(); + rntp->rnt_step_tsc = 0ULL; + rntp->rnt_step_nanos = rntp->rnt_nanos; + rtc_nanotime_set_commpage(rntp); + + /* Restart tick interrupts from the LAPIC timer */ + rtc_lapic_start_ticking(); - simple_lock_init(&rtclock.lock, ETAP_NO_TRACE); - return (RtcFlag); + ml_set_interrupts_enabled(istate); } /* - * Initialize the real-time clock device. Return success (1) - * or failure (0). Since the real-time clock is required to - * provide canonical mapped time, we allocate a page to keep - * the clock time value. In addition, various variables used - * to support the clock are initialized. Note: the clock is - * not started until rtclock_reset is called. + * Initialize the real-time clock device. + * In addition, various variables used to support the clock are initialized. */ int sysclk_init(void) { - vm_offset_t *vp; -#if NCPUS > 1 + uint64_t cycles; + mp_disable_preemption(); - if (cpu_number() != master_cpu) { - mp_enable_preemption(); - return(1); + if (cpu_number() == master_cpu) { + /* + * Perform calibration. + * The PIT is used as the reference to compute how many + * TCS counts (cpu clock cycles) occur per second. + */ + rtc_cycle_count = timeRDTSC(); + cycles = rtc_set_cyc_per_sec(rtc_cycle_count); + + /* + * Set min/max to actual. + * ACPI may update these later if speed-stepping is detected. + */ + gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles; + gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles; + printf("[RTCLOCK] frequency %llu (%llu)\n", + cycles, rtc_cyc_per_sec); + + rtc_lapic_timer_calibrate(); + + /* Minimum interval is 1usec */ + rtc_decrementer_min = deadline_to_decrementer(NSEC_PER_USEC, + 0ULL); + /* Point LAPIC interrupts to hardclock() */ + lapic_set_timer_func((i386_intr_func_t) rtclock_intr); + + clock_timebase_init(); + rtc_initialized = TRUE; } + + rtc_nanotime_init(); + + rtc_lapic_start_ticking(); + mp_enable_preemption(); -#endif - RtcTime = &rtclock.time; - rtc_setvals( CLKNUM, RTC_MINRES ); /* compute constants */ - rtc_set_cyc_per_sec(); /* compute number of tsc beats per second */ return (1); } -static volatile unsigned int last_ival = 0; - /* * Get the clock device time. This routine is responsible * for converting the device's machine dependent time value * into a canonical mach_timespec_t value. */ -kern_return_t -sysclk_gettime( +static kern_return_t +sysclk_gettime_internal( mach_timespec_t *cur_time) /* OUT */ { - mach_timespec_t itime = {0, 0}; - unsigned int val, val2; - int s; - - if (!RtcTime) { - /* Uninitialized */ - cur_time->tv_nsec = 0; - cur_time->tv_sec = 0; - return (KERN_SUCCESS); - } - - /* - * Inhibit interrupts. Determine the incremental - * time since the last interrupt. (This could be - * done in assembler for a bit more speed). - */ - LOCK_RTC(s); - do { - READ_8254(val); /* read clock */ - READ_8254(val2); /* read clock */ - } while ( val2 > val || val2 < val - 10 ); - if ( val > clks_per_int_99 ) { - outb( 0x0a, 0x20 ); /* see if interrupt pending */ - if ( inb( 0x20 ) & 1 ) - itime.tv_nsec = rtclock.intr_nsec; /* yes, add a tick */ - } - itime.tv_nsec += ((clks_per_int - val) * time_per_clk) / ZHZ; - if ( itime.tv_nsec < last_ival ) { - if (rtc_print_lost_tick) - printf( "rtclock: missed clock interrupt.\n" ); - } - last_ival = itime.tv_nsec; - cur_time->tv_sec = rtclock.time.tv_sec; - cur_time->tv_nsec = rtclock.time.tv_nsec; - UNLOCK_RTC(s); - ADD_MACH_TIMESPEC(cur_time, ((mach_timespec_t *)&itime)); + *cur_time = tsc_to_timespec(); return (KERN_SUCCESS); } kern_return_t -sysclk_gettime_internal( +sysclk_gettime( mach_timespec_t *cur_time) /* OUT */ { - mach_timespec_t itime = {0, 0}; - unsigned int val, val2; - - if (!RtcTime) { - /* Uninitialized */ - cur_time->tv_nsec = 0; - cur_time->tv_sec = 0; - return (KERN_SUCCESS); - } - - /* - * Inhibit interrupts. Determine the incremental - * time since the last interrupt. (This could be - * done in assembler for a bit more speed). - */ - do { - READ_8254(val); /* read clock */ - READ_8254(val2); /* read clock */ - } while ( val2 > val || val2 < val - 10 ); - if ( val > clks_per_int_99 ) { - outb( 0x0a, 0x20 ); /* see if interrupt pending */ - if ( inb( 0x20 ) & 1 ) - itime.tv_nsec = rtclock.intr_nsec; /* yes, add a tick */ - } - itime.tv_nsec += ((clks_per_int - val) * time_per_clk) / ZHZ; - if ( itime.tv_nsec < last_ival ) { - if (rtc_print_lost_tick) - printf( "rtclock: missed clock interrupt.\n" ); - } - last_ival = itime.tv_nsec; - cur_time->tv_sec = rtclock.time.tv_sec; - cur_time->tv_nsec = rtclock.time.tv_nsec; - ADD_MACH_TIMESPEC(cur_time, ((mach_timespec_t *)&itime)); - return (KERN_SUCCESS); + return sysclk_gettime_internal(cur_time); } -/* - * Get the clock device time when ALL interrupts are already disabled. - * Same as above except for turning interrupts off and on. - * This routine is responsible for converting the device's machine dependent - * time value into a canonical mach_timespec_t value. - */ void sysclk_gettime_interrupts_disabled( mach_timespec_t *cur_time) /* OUT */ { - mach_timespec_t itime = {0, 0}; - unsigned int val; + (void) sysclk_gettime_internal(cur_time); +} - if (!RtcTime) { - /* Uninitialized */ - cur_time->tv_nsec = 0; - cur_time->tv_sec = 0; - return; - } +// utility routine +// Code to calculate how many processor cycles are in a second... - simple_lock(&rtclock.lock); +static uint64_t +rtc_set_cyc_per_sec(uint64_t cycles) +{ + + if (cycles > (NSEC_PER_SEC/20)) { + // we can use just a "fast" multiply to get nanos + rtc_quant_shift = 32; + rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles); + rtclock.timebase_const.numer = rtc_quant_scale; // timeRDTSC is 1/20 + rtclock.timebase_const.denom = RTC_FAST_DENOM; + } else { + rtc_quant_shift = 26; + rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles); + rtclock.timebase_const.numer = NSEC_PER_SEC/20; // timeRDTSC is 1/20 + rtclock.timebase_const.denom = cycles; + } + rtc_cyc_per_sec = cycles*20; // multiply it by 20 and we are done.. + // BUT we also want to calculate... + + cycles = ((rtc_cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2)) + / UI_CPUFREQ_ROUNDING_FACTOR) + * UI_CPUFREQ_ROUNDING_FACTOR; /* - * Copy the current time knowing that we cant be interrupted - * between the two longwords and so dont need to use MTS_TO_TS + * Set current measured speed. */ - READ_8254(val); /* read clock */ - if ( val > clks_per_int_99 ) { - outb( 0x0a, 0x20 ); /* see if interrupt pending */ - if ( inb( 0x20 ) & 1 ) - itime.tv_nsec = rtclock.intr_nsec; /* yes, add a tick */ - } - itime.tv_nsec += ((clks_per_int - val) * time_per_clk) / ZHZ; - if ( itime.tv_nsec < last_ival ) { - if (rtc_print_lost_tick) - printf( "rtclock: missed clock interrupt.\n" ); - } - last_ival = itime.tv_nsec; - cur_time->tv_sec = rtclock.time.tv_sec; - cur_time->tv_nsec = rtclock.time.tv_nsec; - ADD_MACH_TIMESPEC(cur_time, ((mach_timespec_t *)&itime)); + if (cycles >= 0x100000000ULL) { + gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL; + } else { + gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles; + } + gPEClockFrequencyInfo.cpu_frequency_hz = cycles; - simple_unlock(&rtclock.lock); + kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, rtc_cyc_per_sec); + return(cycles); } -// utility routine -// Code to calculate how many processor cycles are in a second... - -static void -rtc_set_cyc_per_sec() +void +clock_get_system_microtime( + uint32_t *secs, + uint32_t *microsecs) { + mach_timespec_t now; - int x, y; - uint64_t cycles; - uint32_t c[15]; // array for holding sampled cycle counts - mach_timespec_t tst[15]; // array for holding time values. NOTE for some reason tv_sec not work + (void) sysclk_gettime_internal(&now); - for (x=0; x<15; x++) { // quick sample 15 times - tst[x].tv_sec = 0; - tst[x].tv_nsec = 0; - sysclk_gettime_internal(&tst[x]); - rdtsc_hilo(&y, &c[x]); - } - y = 0; - cycles = 0; - for (x=0; x<14; x++) { - // simple formula really. calculate the numerator as the number of elapsed processor - // cycles * 1000 to adjust for the resolution we want. The denominator is the - // elapsed "real" time in nano-seconds. The result will be the processor speed in - // Mhz. any overflows will be discarded before they are added - if ((c[x+1] > c[x]) && (tst[x+1].tv_nsec > tst[x].tv_nsec)) { - cycles += ((uint64_t)(c[x+1]-c[x]) * NSEC_PER_SEC ) / (uint64_t)(tst[x+1].tv_nsec - tst[x].tv_nsec); // elapsed nsecs - y +=1; - } - } - if (y>0) { // we got more than 1 valid sample. This also takes care of the case of if the clock isn't running - cycles = cycles / y; // calc our average - } - rtc_cyc_per_sec = cycles; - rdtsc_hilo(&rtc_last_int_tsc_hi, &rtc_last_int_tsc_lo); + *secs = now.tv_sec; + *microsecs = now.tv_nsec / NSEC_PER_USEC; } -static -natural_t -get_uptime_cycles(void) +void +clock_get_system_nanotime( + uint32_t *secs, + uint32_t *nanosecs) { - // get the time since the last interupt based on the processors TSC ignoring the - // RTC for speed - - uint32_t a,d,intermediate_lo,intermediate_hi,result; - uint64_t newTime; - - rdtsc_hilo(&d, &a); - if (d != rtc_last_int_tsc_hi) { - newTime = d-rtc_last_int_tsc_hi; - newTime = (newTime<<32) + (a-rtc_last_int_tsc_lo); - result = newTime; - } else { - result = a-rtc_last_int_tsc_lo; - } - __asm__ volatile ( " mul %3 ": "=eax" (intermediate_lo), "=edx" (intermediate_hi): "a"(result), "d"(NSEC_PER_SEC) ); - __asm__ volatile ( " div %3": "=eax" (result): "eax"(intermediate_lo), "edx" (intermediate_hi), "ecx" (rtc_cyc_per_sec) ); - return result; -} + mach_timespec_t now; + + (void) sysclk_gettime_internal(&now); + *secs = now.tv_sec; + *nanosecs = now.tv_nsec; +} /* * Get clock device attributes. @@ -571,31 +1017,18 @@ sysclk_getattr( clock_attr_t attr, /* OUT */ mach_msg_type_number_t *count) /* IN/OUT */ { - spl_t s; - if (*count != 1) return (KERN_FAILURE); switch (flavor) { case CLOCK_GET_TIME_RES: /* >0 res */ -#if (NCPUS == 1 || (MP_V1_1 && 0)) - LOCK_RTC(s); - *(clock_res_t *) attr = 1000; - UNLOCK_RTC(s); - break; -#endif /* (NCPUS == 1 || (MP_V1_1 && 0)) && AT386 */ - case CLOCK_ALARM_CURRES: /* =0 no alarm */ - LOCK_RTC(s); - *(clock_res_t *) attr = rtclock.intr_nsec; - UNLOCK_RTC(s); + *(clock_res_t *) attr = rtc_intr_nsec; break; + case CLOCK_ALARM_CURRES: /* =0 no alarm */ case CLOCK_ALARM_MAXRES: - *(clock_res_t *) attr = RTC_MAXRES; - break; - case CLOCK_ALARM_MINRES: - *(clock_res_t *) attr = RTC_MINRES; + *(clock_res_t *) attr = 0; break; default: @@ -604,60 +1037,6 @@ sysclk_getattr( return (KERN_SUCCESS); } -/* - * Set clock device attributes. - */ -kern_return_t -sysclk_setattr( - clock_flavor_t flavor, - clock_attr_t attr, /* IN */ - mach_msg_type_number_t count) /* IN */ -{ - spl_t s; - int freq; - int adj; - clock_res_t new_ires; - - if (count != 1) - return (KERN_FAILURE); - switch (flavor) { - - case CLOCK_GET_TIME_RES: - case CLOCK_ALARM_MAXRES: - case CLOCK_ALARM_MINRES: - return (KERN_FAILURE); - - case CLOCK_ALARM_CURRES: - new_ires = *(clock_res_t *) attr; - - /* - * The new resolution must be within the predetermined - * range. If the desired resolution cannot be achieved - * to within 0.1%, an error is returned. - */ - if (new_ires < RTC_MAXRES || new_ires > RTC_MINRES) - return (KERN_INVALID_VALUE); - freq = (NSEC_PER_SEC / new_ires); - adj = (((clknum % freq) * new_ires) / clknum); - if (adj > (new_ires / 1000)) - return (KERN_INVALID_VALUE); - /* - * Record the new alarm resolution which will take effect - * on the next HZ aligned clock tick. - */ - LOCK_RTC(s); - if ( freq != rtc_intr_freq ) { - rtclock.new_ires = new_ires; - new_clknum = clknum; - } - UNLOCK_RTC(s); - return (KERN_SUCCESS); - - default: - return (KERN_INVALID_VALUE); - } -} - /* * Set next alarm time for the clock device. This call * always resets the time to deliver an alarm for the @@ -667,12 +1046,9 @@ void sysclk_setalarm( mach_timespec_t *alarm_time) { - spl_t s; - - LOCK_RTC(s); - rtclock.alarm_time = *alarm_time; - RtcAlrm = &rtclock.alarm_time; - UNLOCK_RTC(s); + timer_call_enter(&rtclock_alarm_timer, + (uint64_t) alarm_time->tv_sec * NSEC_PER_SEC + + alarm_time->tv_nsec); } /* @@ -702,39 +1078,69 @@ calend_gettime( { spl_t s; - LOCK_RTC(s); + RTC_LOCK(s); if (!rtclock.calend_is_set) { - UNLOCK_RTC(s); + RTC_UNLOCK(s); return (KERN_FAILURE); } (void) sysclk_gettime_internal(cur_time); ADD_MACH_TIMESPEC(cur_time, &rtclock.calend_offset); - UNLOCK_RTC(s); + RTC_UNLOCK(s); return (KERN_SUCCESS); } -/* - * Set the current clock time. - */ -kern_return_t -calend_settime( - mach_timespec_t *new_time) +void +clock_get_calendar_microtime( + uint32_t *secs, + uint32_t *microsecs) +{ + mach_timespec_t now; + + calend_gettime(&now); + + *secs = now.tv_sec; + *microsecs = now.tv_nsec / NSEC_PER_USEC; +} + +void +clock_get_calendar_nanotime( + uint32_t *secs, + uint32_t *nanosecs) { - mach_timespec_t curr_time; + mach_timespec_t now; + + calend_gettime(&now); + + *secs = now.tv_sec; + *nanosecs = now.tv_nsec; +} + +void +clock_set_calendar_microtime( + uint32_t secs, + uint32_t microsecs) +{ + mach_timespec_t new_time, curr_time; + uint32_t old_offset; spl_t s; - LOCK_RTC(s); + new_time.tv_sec = secs; + new_time.tv_nsec = microsecs * NSEC_PER_USEC; + + RTC_LOCK(s); + old_offset = rtclock.calend_offset.tv_sec; (void) sysclk_gettime_internal(&curr_time); - rtclock.calend_offset = *new_time; + rtclock.calend_offset = new_time; SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time); + rtclock.boottime += rtclock.calend_offset.tv_sec - old_offset; rtclock.calend_is_set = TRUE; - UNLOCK_RTC(s); + RTC_UNLOCK(s); - (void) bbc_settime(new_time); + (void) bbc_settime(&new_time); - return (KERN_SUCCESS); + host_notify_calendar_change(); } /* @@ -746,24 +1152,13 @@ calend_getattr( clock_attr_t attr, /* OUT */ mach_msg_type_number_t *count) /* IN/OUT */ { - spl_t s; - if (*count != 1) return (KERN_FAILURE); switch (flavor) { case CLOCK_GET_TIME_RES: /* >0 res */ -#if (NCPUS == 1 || (MP_V1_1 && 0)) - LOCK_RTC(s); - *(clock_res_t *) attr = 1000; - UNLOCK_RTC(s); - break; -#else /* (NCPUS == 1 || (MP_V1_1 && 0)) && AT386 */ - LOCK_RTC(s); - *(clock_res_t *) attr = rtclock.intr_nsec; - UNLOCK_RTC(s); + *(clock_res_t *) attr = rtc_intr_nsec; break; -#endif /* (NCPUS == 1 || (MP_V1_1 && 0)) && AT386 */ case CLOCK_ALARM_CURRES: /* =0 no alarm */ case CLOCK_ALARM_MINRES: @@ -777,16 +1172,89 @@ calend_getattr( return (KERN_SUCCESS); } -void -clock_adjust_calendar( - clock_res_t nsec) +#define tickadj (40*NSEC_PER_USEC) /* "standard" skew, ns / tick */ +#define bigadj (NSEC_PER_SEC) /* use 10x skew above bigadj ns */ + +uint32_t +clock_set_calendar_adjtime( + int32_t *secs, + int32_t *microsecs) { - spl_t s; + int64_t total, ototal; + uint32_t interval = 0; + spl_t s; + + total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC; + + RTC_LOCK(s); + ototal = rtclock.calend_adjtotal; - LOCK_RTC(s); - if (rtclock.calend_is_set) - ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, nsec); - UNLOCK_RTC(s); + if (total != 0) { + int32_t delta = tickadj; + + if (total > 0) { + if (total > bigadj) + delta *= 10; + if (delta > total) + delta = total; + } + else { + if (total < -bigadj) + delta *= 10; + delta = -delta; + if (delta < total) + delta = total; + } + + rtclock.calend_adjtotal = total; + rtclock.calend_adjdelta = delta; + + interval = NSEC_PER_HZ; + } + else + rtclock.calend_adjdelta = rtclock.calend_adjtotal = 0; + + RTC_UNLOCK(s); + + if (ototal == 0) + *secs = *microsecs = 0; + else { + *secs = ototal / NSEC_PER_SEC; + *microsecs = ototal % NSEC_PER_SEC; + } + + return (interval); +} + +uint32_t +clock_adjust_calendar(void) +{ + uint32_t interval = 0; + int32_t delta; + spl_t s; + + RTC_LOCK(s); + delta = rtclock.calend_adjdelta; + ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, delta); + + rtclock.calend_adjtotal -= delta; + + if (delta > 0) { + if (delta > rtclock.calend_adjtotal) + rtclock.calend_adjdelta = rtclock.calend_adjtotal; + } + else + if (delta < 0) { + if (delta < rtclock.calend_adjtotal) + rtclock.calend_adjdelta = rtclock.calend_adjtotal; + } + + if (rtclock.calend_adjdelta != 0) + interval = NSEC_PER_HZ; + + RTC_UNLOCK(s); + + return (interval); } void @@ -798,198 +1266,176 @@ clock_initialize_calendar(void) if (bbc_gettime(&bbc_time) != KERN_SUCCESS) return; - LOCK_RTC(s); - if (!rtclock.calend_is_set) { - (void) sysclk_gettime_internal(&curr_time); - rtclock.calend_offset = bbc_time; - SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time); - rtclock.calend_is_set = TRUE; - } - UNLOCK_RTC(s); + RTC_LOCK(s); + if (rtclock.boottime == 0) + rtclock.boottime = bbc_time.tv_sec; + (void) sysclk_gettime_internal(&curr_time); + rtclock.calend_offset = bbc_time; + SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time); + rtclock.calend_is_set = TRUE; + RTC_UNLOCK(s); + + host_notify_calendar_change(); } -mach_timespec_t -clock_get_calendar_offset(void) +void +clock_get_boottime_nanotime( + uint32_t *secs, + uint32_t *nanosecs) { - mach_timespec_t result = MACH_TIMESPEC_ZERO; - spl_t s; - - LOCK_RTC(s); - if (rtclock.calend_is_set) - result = rtclock.calend_offset; - UNLOCK_RTC(s); - - return (result); + *secs = rtclock.boottime; + *nanosecs = 0; } void clock_timebase_info( mach_timebase_info_t info) { - spl_t s; - - LOCK_RTC(s); - info->numer = info->denom = 1; - UNLOCK_RTC(s); + info->numer = info->denom = 1; } void clock_set_timer_deadline( uint64_t deadline) { - spl_t s; - - LOCK_RTC(s); - rtclock.timer_deadline = deadline; - rtclock.timer_is_set = TRUE; - UNLOCK_RTC(s); + spl_t s; + cpu_data_t *pp = current_cpu_datap(); + rtclock_timer_t *mytimer = &pp->cpu_rtc_timer; + uint64_t abstime; + uint64_t decr; + + assert(get_preemption_level() > 0); + assert(rtclock_timer_expire); + + RTC_INTRS_OFF(s); + mytimer->deadline = deadline; + mytimer->is_set = TRUE; + if (!mytimer->has_expired) { + abstime = mach_absolute_time(); + if (mytimer->deadline < pp->cpu_rtc_tick_deadline) { + decr = deadline_to_decrementer(mytimer->deadline, + abstime); + rtc_lapic_set_timer(decr); + pp->cpu_rtc_intr_deadline = mytimer->deadline; + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | + DBG_FUNC_NONE, decr, 2, 0, 0, 0); + } + } + RTC_INTRS_ON(s); } void clock_set_timer_func( clock_timer_func_t func) { - spl_t s; - - LOCK_RTC(s); - if (rtclock.timer_expire == NULL) - rtclock.timer_expire = func; - UNLOCK_RTC(s); + if (rtclock_timer_expire == NULL) + rtclock_timer_expire = func; } - - /* - * Load the count register and start the clock. + * Real-time clock device interrupt. */ -#define RTCLOCK_RESET() { \ - outb(PITCTL_PORT, PIT_C0|PIT_NDIVMODE|PIT_READMODE); \ - outb(PITCTR0_PORT, (clks_per_int & 0xff)); \ - outb(PITCTR0_PORT, (clks_per_int >> 8)); \ -} - -/* - * Reset the clock device. This causes the realtime clock - * device to reload its mode and count value (frequency). - * Note: the CPU should be calibrated - * before starting the clock for the first time. - */ - void -rtclock_reset(void) +rtclock_intr(struct i386_interrupt_state *regs) { - int s; + uint64_t abstime; + uint32_t latency; + uint64_t decr; + uint64_t decr_tick; + uint64_t decr_timer; + cpu_data_t *pp = current_cpu_datap(); + rtclock_timer_t *mytimer = &pp->cpu_rtc_timer; + + assert(get_preemption_level() > 0); + assert(!ml_get_interrupts_enabled()); + + abstime = _rtc_nanotime_read(); + latency = (uint32_t) abstime - pp->cpu_rtc_intr_deadline; + if (pp->cpu_rtc_tick_deadline <= abstime) { + rtc_nanotime_update(); + clock_deadline_for_periodic_event( + NSEC_PER_HZ, abstime, &pp->cpu_rtc_tick_deadline); + hertz_tick( +#if STAT_TIME + NSEC_PER_HZ, +#endif + (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0), + regs->eip); + } -#if NCPUS > 1 && !(MP_V1_1 && 0) - mp_disable_preemption(); - if (cpu_number() != master_cpu) { - mp_enable_preemption(); - return; + abstime = _rtc_nanotime_read(); + if (mytimer->is_set && mytimer->deadline <= abstime) { + mytimer->has_expired = TRUE; + mytimer->is_set = FALSE; + (*rtclock_timer_expire)(abstime); + assert(!ml_get_interrupts_enabled()); + mytimer->has_expired = FALSE; } - mp_enable_preemption(); -#endif /* NCPUS > 1 && AT386 && !MP_V1_1 */ - LOCK_RTC(s); - RTCLOCK_RESET(); - UNLOCK_RTC(s); -} -/* - * Real-time clock device interrupt. Called only on the - * master processor. Updates the clock time and upcalls - * into the higher level clock code to deliver alarms. - */ -int -rtclock_intr(void) -{ - uint64_t abstime; - mach_timespec_t clock_time; - int i; - spl_t s; + /* Log the interrupt service latency (-ve value expected by tool) */ + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE, + -latency, (uint32_t)regs->eip, 0, 0, 0); - /* - * Update clock time. Do the update so that the macro - * MTS_TO_TS() for reading the mapped time works (e.g. - * update in order: mtv_csec, mtv_time.tv_nsec, mtv_time.tv_sec). - */ - LOCK_RTC(s); - rdtsc_hilo(&rtc_last_int_tsc_hi, &rtc_last_int_tsc_lo); - i = rtclock.time.tv_nsec + rtclock.intr_nsec; - if (i < NSEC_PER_SEC) - rtclock.time.tv_nsec = i; - else { - rtclock.time.tv_nsec = i - NSEC_PER_SEC; - rtclock.time.tv_sec++; - } - /* note time now up to date */ - last_ival = 0; + abstime = _rtc_nanotime_read(); + decr_tick = deadline_to_decrementer(pp->cpu_rtc_tick_deadline, abstime); + decr_timer = (mytimer->is_set) ? + deadline_to_decrementer(mytimer->deadline, abstime) : + DECREMENTER_MAX; + decr = MIN(decr_tick, decr_timer); + pp->cpu_rtc_intr_deadline = abstime + decr; - rtclock.abstime += rtclock.intr_nsec; - abstime = rtclock.abstime; - if ( rtclock.timer_is_set && - rtclock.timer_deadline <= abstime ) { - rtclock.timer_is_set = FALSE; - UNLOCK_RTC(s); + rtc_lapic_set_timer(decr); - (*rtclock.timer_expire)(abstime); + /* Log the new decrementer value */ + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, + decr, 3, 0, 0, 0); - LOCK_RTC(s); - } +} - /* - * Perform alarm clock processing if needed. The time - * passed up is incremented by a half-interrupt tick - * to trigger alarms closest to their desired times. - * The clock_alarm_intr() routine calls sysclk_setalrm() - * before returning if later alarms are pending. - */ +static void +rtclock_alarm_expire( + __unused timer_call_param_t p0, + __unused timer_call_param_t p1) +{ + mach_timespec_t clock_time; - if (RtcAlrm && (RtcAlrm->tv_sec < RtcTime->tv_sec || - (RtcAlrm->tv_sec == RtcTime->tv_sec && - RtcDelt >= RtcAlrm->tv_nsec - RtcTime->tv_nsec))) { - clock_time.tv_sec = 0; - clock_time.tv_nsec = RtcDelt; - ADD_MACH_TIMESPEC (&clock_time, RtcTime); - RtcAlrm = 0; - UNLOCK_RTC(s); - /* - * Call clock_alarm_intr() without RTC-lock. - * The lock ordering is always CLOCK-lock - * before RTC-lock. - */ - clock_alarm_intr(SYSTEM_CLOCK, &clock_time); - LOCK_RTC(s); - } + (void) sysclk_gettime_internal(&clock_time); - /* - * On a HZ-tick boundary: return 0 and adjust the clock - * alarm resolution (if requested). Otherwise return a - * non-zero value. - */ - if ((i = --rtc_intr_count) == 0) { - if (rtclock.new_ires) { - rtc_setvals(new_clknum, rtclock.new_ires); - RTCLOCK_RESET(); /* lock clock register */ - rtclock.new_ires = 0; - } - rtc_intr_count = rtc_intr_hertz; - } - UNLOCK_RTC(s); - return (i); + clock_alarm_intr(SYSTEM_CLOCK, &clock_time); } void clock_get_uptime( uint64_t *result) { - uint32_t ticks; - spl_t s; + *result = rtc_nanotime_read(); +} - LOCK_RTC(s); - ticks = get_uptime_cycles(); - *result = rtclock.abstime; - UNLOCK_RTC(s); +uint64_t +mach_absolute_time(void) +{ + return rtc_nanotime_read(); +} - *result += ticks; +void +absolutetime_to_microtime( + uint64_t abstime, + uint32_t *secs, + uint32_t *microsecs) +{ + uint32_t remain; + + asm volatile( + "divl %3" + : "=a" (*secs), "=d" (remain) + : "A" (abstime), "r" (NSEC_PER_SEC)); + asm volatile( + "divl %3" + : "=a" (*microsecs) + : "0" (remain), "d" (0), "r" (NSEC_PER_USEC)); } void @@ -1042,87 +1488,14 @@ nanoseconds_to_absolutetime( *result = nanoseconds; } -/* - * measure_delay(microseconds) - * - * Measure elapsed time for delay calls - * Returns microseconds. - * - * Microseconds must not be too large since the counter (short) - * will roll over. Max is about 13 ms. Values smaller than 1 ms are ok. - * This uses the assumed frequency of the rt clock which is emperically - * accurate to only about 200 ppm. - */ - -int -measure_delay( - int us) -{ - unsigned int lsb, val; - - outb(PITCTL_PORT, PIT_C0|PIT_NDIVMODE|PIT_READMODE); - outb(PITCTR0_PORT, 0xff); /* set counter to max value */ - outb(PITCTR0_PORT, 0xff); - delay(us); - outb(PITCTL_PORT, PIT_C0); - lsb = inb(PITCTR0_PORT); - val = (inb(PITCTR0_PORT) << 8) | lsb; - val = 0xffff - val; - val *= 1000000; - val /= CLKNUM; - return(val); -} - -/* - * calibrate_delay(void) - * - * Adjust delaycount. Called from startup before clock is started - * for normal interrupt generation. - */ - void -calibrate_delay(void) +machine_delay_until( + uint64_t deadline) { - unsigned val; - int prev = 0; - register int i; - - printf("adjusting delay count: %d", delaycount); - for (i=0; i<10; i++) { - prev = delaycount; - /* - * microdata must not be too large since measure_timer - * will not return accurate values if the counter (short) - * rolls over - */ - val = measure_delay(microdata); - if (val == 0) { - delaycount *= 2; - } else { - delaycount *= microdata; - delaycount += val-1; /* round up to upper us */ - delaycount /= val; - } - if (delaycount <= 0) - delaycount = 1; - if (delaycount != prev) - printf(" %d", delaycount); - } - printf("\n"); -} + uint64_t now; -#if MACH_KDB -void -test_delay(void); - -void -test_delay(void) -{ - register i; - - for (i = 0; i < 10; i++) - printf("%d, %d\n", i, measure_delay(i)); - for (i = 10; i <= 100; i+=10) - printf("%d, %d\n", i, measure_delay(i)); + do { + cpu_pause(); + now = mach_absolute_time(); + } while (now < deadline); } -#endif /* MACH_KDB */