+/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow );
+ *
+ * This is the same as the commpage nanotime routine, except that it uses the
+ * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies
+ * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped().
+ *
+ * Warning! There is another copy of this code in osfmk/i386/locore.s. The
+ * two versions must be kept in sync with each other!
+ *
+ * There are actually two versions of the algorithm, one each for "slow" and "fast"
+ * processors. The more common "fast" algorithm is:
+ *
+ * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base;
+ *
+ * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization:
+ *
+ * rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
+ *
+ * The "slow" algorithm uses long division:
+ *
+ * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base;
+ *
+ * Since this routine is not synchronized and can be called in any context,
+ * we use a generation count to guard against seeing partially updated data. In addition,
+ * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before
+ * updating the data, and stores the nonzero generation only after all other data has been
+ * stored. Because IA32 guarantees that stores by one processor must be seen in order
+ * by another, we can avoid using a lock. We spin while the generation is zero.
+ *
+ * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax.
+ */
+
+ .globl EXT(_rtc_nanotime_read)
+ .align FALIGN
+LEXT(_rtc_nanotime_read)
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %ebx
+ movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */
+ movl 12(%ebp),%eax /* get "slow" flag */
+ testl %eax,%eax
+ jnz Lslow
+
+ /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
+ PAL_RTC_NANOTIME_READ_FAST()
+
+ popl %ebx
+ popl %edi
+ popl %esi
+ popl %ebp