osfmk/i386/machine_routines_asm.s

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <i386/asm.h>
  30 #include <i386/proc_reg.h>
  31 #include <i386/eflags.h>
  32
  33 #include <i386/postcode.h>
  34 #include <i386/apic.h>
  35 #include <assym.s>
  36
  37 /*
  38 **      ml_get_timebase()
  39 **
  40 **      Entry   - %esp contains pointer to 64 bit structure.
  41 **
  42 **      Exit    - 64 bit structure filled in.
  43 **
  44 */
  45 ENTRY(ml_get_timebase)
  46
  47                         movl    S_ARG0, %ecx
  48
  49                         rdtsc
  50
  51                         movl    %edx, 0(%ecx)
  52                         movl    %eax, 4(%ecx)
  53
  54                         ret
  55
  56 /*
  57  *      Convert between various timer units
  58  *
  59  *              uint64_t tmrCvt(uint64_t time, uint64_t *conversion)
  60  *
  61  *              This code converts 64-bit time units to other units.
  62  *              For example, the TSC is converted to HPET units.
  63  *
  64  *              Time is a 64-bit integer that is some number of ticks.
  65  *              Conversion is 64-bit fixed point number which is composed
  66  *              of a 32 bit integer and a 32 bit fraction.
  67  *
  68  *              The time ticks are multiplied by the conversion factor.  The
  69  *              calculations are done as a 128-bit value but both the high
  70  *              and low words are dropped.  The high word is overflow and the
  71  *              low word is the fraction part of the result.
  72  *
  73  *              We return a 64-bit value.
  74  *
  75  *              Note that we can use this function to multiply 2 conversion factors.
  76  *              We do this in order to calculate the multiplier used to convert
  77  *              directly between any two units.
  78  *
  79  */
  80
  81                         .globl  EXT(tmrCvt)
  82                         .align FALIGN
  83
  84 LEXT(tmrCvt)
  85
  86                         pushl   %ebp                                    // Save a volatile
  87                         movl    %esp,%ebp                               // Get the parameters - 8
  88                         pushl   %ebx                                    // Save a volatile
  89                         pushl   %esi                                    // Save a volatile
  90                         pushl   %edi                                    // Save a volatile
  91
  92 //                      %ebp + 8        - low-order ts
  93 //                      %ebp + 12       - high-order ts
  94 //                      %ebp + 16       - low-order cvt
  95 //                      %ebp + 20       - high-order cvt
  96
  97                         movl    8(%ebp),%eax                    // Get low-order ts
  98                         mull    16(%ebp)                                // Multiply by low-order conversion
  99                         movl    %edx,%edi                               // Need to save only the high order part
 100
 101                         movl    12(%ebp),%eax                   // Get the high-order ts
 102                         mull    16(%ebp)                                // Multiply by low-order conversion
 103                         addl    %eax,%edi                               // Add in the overflow from the low x low calculation
 104                         adcl    $0,%edx                                 // Add in any overflow to high high part
 105                         movl    %edx,%esi                               // Save high high part
 106
 107 //                      We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt
 108 //                      in %esi:%edi
 109
 110                         movl    8(%ebp),%eax                    // Get low-order ts
 111                         mull    20(%ebp)                                // Multiply by high-order conversion
 112                         movl    %eax,%ebx                               // Need to save the low order part
 113                         movl    %edx,%ecx                               // Need to save the high order part
 114
 115                         movl    12(%ebp),%eax                   // Get the high-order ts
 116                         mull    20(%ebp)                                // Multiply by high-order conversion
 117
 118 //                      Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high
 119 //                      We don't care about the highest word since it is overflow
 120
 121                         addl    %edi,%ebx                               // Add the low words
 122                         adcl    %ecx,%esi                               // Add in the high plus carry from low
 123                         addl    %eax,%esi                               // Add in the rest of the high
 124
 125                         movl    %ebx,%eax                               // Pass back low word
 126                         movl    %esi,%edx                               // and the high word
 127
 128                         popl    %edi                                    // Restore a volatile
 129                         popl    %esi                                    // Restore a volatile
 130                         popl    %ebx                                    // Restore a volatile
 131                         popl    %ebp                                    // Restore a volatile
 132
 133                         ret                                             // Leave...
 134
 135                         .globl  EXT(_rtc_nanotime_store)
 136                         .align  FALIGN
 137
 138 LEXT(_rtc_nanotime_store)
 139                 push            %ebp
 140                 movl            %esp,%ebp
 141                 push            %esi
 142
 143                 mov             32(%ebp),%edx                           /* get ptr to rtc_nanotime_info */
 144
 145                 movl            RNT_GENERATION(%edx),%esi               /* get current generation */
 146                 movl            $0,RNT_GENERATION(%edx)                 /* flag data as being updated */
 147
 148                 mov             8(%ebp),%eax
 149                 mov             %eax,RNT_TSC_BASE(%edx)
 150                 mov             12(%ebp),%eax
 151                 mov             %eax,RNT_TSC_BASE+4(%edx)
 152
 153                 mov             24(%ebp),%eax
 154                 mov             %eax,RNT_SCALE(%edx)
 155
 156                 mov             28(%ebp),%eax
 157                 mov             %eax,RNT_SHIFT(%edx)
 158
 159                 mov             16(%ebp),%eax
 160                 mov             %eax,RNT_NS_BASE(%edx)
 161                 mov             20(%ebp),%eax
 162                 mov             %eax,RNT_NS_BASE+4(%edx)
 163
 164                 incl            %esi                                    /* next generation */
 165                 jnz             1f
 166                 incl            %esi                                    /* skip 0, which is a flag */
 167 1:              movl            %esi,RNT_GENERATION(%edx)               /* update generation and make usable */
 168
 169                 pop             %esi
 170                 pop             %ebp
 171                 ret
 172
 173
 174 /* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow );
 175  *
 176  * This is the same as the commpage nanotime routine, except that it uses the
 177  * kernel internal "rtc_nanotime_info" data instead of the commpage data.  The two copies
 178  * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped().
 179  *
 180  * Warning!  There is another copy of this code in osfmk/i386/locore.s.  The
 181  * two versions must be kept in sync with each other!
 182  *
 183  * There are actually two versions of the algorithm, one each for "slow" and "fast"
 184  * processors.  The more common "fast" algorithm is:
 185  *
 186  *      nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base;
 187  *
 188  * Of course, the divide by 2**32 is a nop.  rnt_tsc_scale is a constant computed during initialization:
 189  *
 190  *      rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
 191  *
 192  * The "slow" algorithm uses long division:
 193  *
 194  *      nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base;
 195  *
 196  * Since this routine is not synchronized and can be called in any context,
 197  * we use a generation count to guard against seeing partially updated data.  In addition,
 198  * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before
 199  * updating the data, and stores the nonzero generation only after all other data has been
 200  * stored.  Because IA32 guarantees that stores by one processor must be seen in order
 201  * by another, we can avoid using a lock.  We spin while the generation is zero.
 202  *
 203  * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax.
 204  */
 205
 206                 .globl  EXT(_rtc_nanotime_read)
 207                 .align  FALIGN
 208 LEXT(_rtc_nanotime_read)
 209                 pushl           %ebp
 210                 movl            %esp,%ebp
 211                 pushl           %esi
 212                 pushl           %edi
 213                 pushl           %ebx
 214                 movl            8(%ebp),%edi                            /* get ptr to rtc_nanotime_info */
 215                 movl            12(%ebp),%eax                           /* get "slow" flag */
 216                 testl           %eax,%eax
 217                 jnz             Lslow
 218
 219                 /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
 220 0:
 221                 movl            RNT_GENERATION(%edi),%esi               /* get generation (0 if being changed) */
 222                 testl           %esi,%esi                               /* if being changed, loop until stable */
 223                 jz              0b
 224
 225                 rdtsc                                                   /* get TSC in %edx:%eax */
 226                 subl            RNT_TSC_BASE(%edi),%eax
 227                 sbbl            RNT_TSC_BASE+4(%edi),%edx
 228
 229                 movl            RNT_SCALE(%edi),%ecx
 230
 231                 movl            %edx,%ebx
 232                 mull            %ecx
 233                 movl            %ebx,%eax
 234                 movl            %edx,%ebx
 235                 mull            %ecx
 236                 addl            %ebx,%eax
 237                 adcl            $0,%edx
 238
 239                 addl            RNT_NS_BASE(%edi),%eax
 240                 adcl            RNT_NS_BASE+4(%edi),%edx
 241
 242                 cmpl            RNT_GENERATION(%edi),%esi               /* have the parameters changed? */
 243                 jne             0b                                      /* yes, loop until stable */
 244
 245                 popl            %ebx
 246                 popl            %edi
 247                 popl            %esi
 248                 popl            %ebp
 249                 ret
 250
 251                 /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */
 252 Lslow:
 253                 movl            RNT_GENERATION(%edi),%esi               /* get generation (0 if being changed) */
 254                 testl           %esi,%esi                               /* if being changed, loop until stable */
 255                 jz              Lslow
 256                 pushl           %esi                                    /* save generation */
 257                 pushl           RNT_SHIFT(%edi)                         /* save low 32 bits of tscFreq */
 258
 259                 rdtsc                                                   /* get TSC in %edx:%eax */
 260                 subl            RNT_TSC_BASE(%edi),%eax
 261                 sbbl            RNT_TSC_BASE+4(%edi),%edx
 262
 263                 /*
 264                 * Do the math to convert tsc ticks to nanoseconds.  We first
 265                 * do long multiply of 1 billion times the tsc.  Then we do
 266                 * long division by the tsc frequency
 267                 */
 268                 mov             $1000000000, %ecx                       /* number of nanoseconds in a second */
 269                 mov             %edx, %ebx
 270                 mul             %ecx
 271                 mov             %edx, %edi
 272                 mov             %eax, %esi
 273                 mov             %ebx, %eax
 274                 mul             %ecx
 275                 add             %edi, %eax
 276                 adc             $0, %edx                                /* result in edx:eax:esi */
 277                 mov             %eax, %edi
 278                 popl            %ecx                                    /* get low 32 tscFreq */
 279                 xor             %eax, %eax
 280                 xchg            %edx, %eax
 281                 div             %ecx
 282                 xor             %eax, %eax
 283                 mov             %edi, %eax
 284                 div             %ecx
 285                 mov             %eax, %ebx
 286                 mov             %esi, %eax
 287                 div             %ecx
 288                 mov             %ebx, %edx                              /* result in edx:eax */
 289
 290                 movl            8(%ebp),%edi                            /* recover ptr to rtc_nanotime_info */
 291                 popl            %esi                                    /* recover generation */
 292
 293                 addl            RNT_NS_BASE(%edi),%eax
 294                 adcl            RNT_NS_BASE+4(%edi),%edx
 295
 296                 cmpl            RNT_GENERATION(%edi),%esi               /* have the parameters changed? */
 297                 jne             Lslow                                   /* yes, loop until stable */
 298
 299                 pop             %ebx
 300                 pop             %edi
 301                 pop             %esi
 302                 pop             %ebp
 303                 ret                                                     /* result in edx:eax */
 304