osfmk/ppc/movc.s

   1 /*
   2  * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /*
  24  * @OSF_COPYRIGHT@
  25  */
  26 #include <debug.h>
  27 #include <ppc/asm.h>
  28 #include <ppc/proc_reg.h>
  29 #include <mach/ppc/vm_param.h>
  30 #include <assym.s>
  31 #include <sys/errno.h>
  32
  33 #define INSTRUMENT 0
  34
  35 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  36 /*
  37  * void pmap_zero_page(vm_offset_t pa)
  38  *
  39  * Zero a page of physical memory.  This routine runs in 32 or 64-bit mode,
  40  * and handles 32 and 128-byte cache lines.
  41  */
  42
  43
  44                 .align  5
  45                 .globl  EXT(pmap_zero_page)
  46
  47 LEXT(pmap_zero_page)
  48
  49         mflr    r12                                                             // save return address
  50         bl              EXT(ml_set_physical_disabled)   // turn DR and EE off, SF on, get features in r10
  51         mtlr    r12                                                             // restore return address
  52         andi.   r9,r10,pf32Byte+pf128Byte               // r9 <- cache line size
  53
  54         subfic  r4,r9,PPC_PGBYTES                               // r4 <- starting offset in page
  55
  56                 bt++    pf64Bitb,page0S4                                // Go do the big guys...
  57
  58                 slwi    r3,r3,12                                                // get page address from page num
  59                 b               page_zero_1                                             // Jump to line aligned loop...
  60
  61         .align  5
  62
  63                 nop
  64                 nop
  65                 nop
  66                 nop
  67                 nop
  68                 nop
  69                 nop
  70
  71 page0S4:
  72                 sldi    r3,r3,12                                                // get page address from page num
  73
  74 page_zero_1:                                                                    // loop zeroing cache lines
  75         sub.    r5,r4,r9                                                // more to go?
  76         dcbz128 r3,r4                                                   // zero either 32 or 128 bytes
  77         sub             r4,r5,r9                                                // generate next offset
  78         dcbz128 r3,r5
  79         bne--   page_zero_1
  80
  81         b               EXT(ml_restore)                                 // restore MSR and do the isync
  82
  83
  84 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
  85 /* void
  86  * phys_copy(src, dst, bytecount)
  87  *      addr64_t            src;
  88  *      addr64_t            dst;
  89  *      int             bytecount
  90  *
  91  * This routine will copy bytecount bytes from physical address src to physical
  92  * address dst.  It runs in 64-bit mode if necessary, but does not handle
  93  * overlap or make any attempt to be optimal.  Length must be a signed word.
  94  * Not performance critical.
  95  */
  96
  97
  98                 .align  5
  99                 .globl  EXT(phys_copy)
 100
 101 LEXT(phys_copy)
 102
 103                 rlwinm  r3,r3,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
 104         mflr    r12                                                             // get return address
 105                 rlwimi  r3,r4,0,0,31                            ; Combine bottom of long long to full 64-bits
 106                 rlwinm  r4,r5,0,1,0                                     ; Duplicate high half of long long paddr into top of reg
 107         bl              EXT(ml_set_physical_disabled)   // turn DR and EE off, SF on, get features in r10
 108                 rlwimi  r4,r6,0,0,31                            ; Combine bottom of long long to full 64-bits
 109         mtlr    r12                                                             // restore return address
 110         subic.  r5,r7,4                                                 // a word to copy?
 111         b               phys_copy_2
 112
 113                 .align  5
 114
 115 phys_copy_1:                                                                    // loop copying words
 116         subic.  r5,r5,4                                                 // more to go?
 117         lwz             r0,0(r3)
 118         addi    r3,r3,4
 119         stw             r0,0(r4)
 120         addi    r4,r4,4
 121 phys_copy_2:
 122         bge             phys_copy_1
 123         addic.  r5,r5,4                                                 // restore count
 124         ble             phys_copy_4                                             // no more
 125
 126                                                                                         // Loop is aligned here
 127
 128 phys_copy_3:                                                                    // loop copying bytes
 129         subic.  r5,r5,1                                                 // more to go?
 130         lbz             r0,0(r3)
 131         addi    r3,r3,1
 132         stb             r0,0(r4)
 133         addi    r4,r4,1
 134         bgt             phys_copy_3
 135 phys_copy_4:
 136         b               EXT(ml_restore)                                 // restore MSR and do the isync
 137
 138
 139 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 140 /* void
 141  * pmap_copy_page(src, dst)
 142  *      ppnum_t     src;
 143  *      ppnum_t     dst;
 144  *
 145  * This routine will copy the physical page src to physical page dst
 146  *
 147  * This routine assumes that the src and dst are page numbers and that the
 148  * destination is cached.  It runs on 32 and 64 bit processors, with and
 149  * without altivec, and with 32 and 128 byte cache lines.
 150  * We also must assume that no-one will be executing within the destination
 151  * page, and that this will be used for paging.  Because this
 152  * is a common routine, we have tuned loops for each processor class.
 153  *
 154  */
 155 #define kSFSize (FM_SIZE+160)
 156
 157 ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
 158
 159                 lis             r2,hi16(MASK(MSR_VEC))                  ; Get the vector flag
 160         mflr    r0                                                              // get return
 161                 ori             r2,r2,lo16(MASK(MSR_FP))                ; Add the FP flag
 162                 stw             r0,8(r1)                                                // save
 163         stwu    r1,-kSFSize(r1)                                 // set up a stack frame for VRs or FPRs
 164         mfmsr   r11                                                             // save MSR at entry
 165         mfsprg  r10,2                                                   // get feature flags
 166         andc    r11,r11,r2                                              // Clear out vec and fp
 167         ori             r2,r2,lo16(MASK(MSR_EE))                // Get EE on also
 168         andc    r2,r11,r2                                               // Clear out EE as well
 169         mtcrf   0x02,r10                                                // we need to test pf64Bit
 170         ori             r2,r2,MASK(MSR_FP)                              // must enable FP for G3...
 171         mtcrf   0x80,r10                                                // we need to test pfAltivec too
 172         oris    r2,r2,hi16(MASK(MSR_VEC))               // enable altivec for G4 (ignored if G3)
 173         mtmsr   r2                                                              // turn EE off, FP and VEC on
 174         isync
 175         bt++    pf64Bitb,pmap_copy_64                   // skip if 64-bit processor (only they take hint)
 176                 slwi    r3,r3,12                                                // get page address from page num
 177                 slwi    r4,r4,12                                                // get page address from page num
 178         rlwinm  r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1      // get ready to turn off DR
 179         bt              pfAltivecb,pmap_copy_g4                 // altivec but not 64-bit means G4
 180
 181
 182         // G3 -- copy using FPRs
 183
 184         stfd    f0,FM_SIZE+0(r1)                                // save the 4 FPRs we use to copy
 185         stfd    f1,FM_SIZE+8(r1)
 186         li              r5,PPC_PGBYTES/32                               // count of cache lines in a page
 187         stfd    f2,FM_SIZE+16(r1)
 188         mtctr   r5
 189         stfd    f3,FM_SIZE+24(r1)
 190         mtmsr   r12                                                             // turn off DR after saving FPRs on stack
 191         isync
 192
 193 pmap_g3_copy_loop:                                                              // loop over 32-byte cache lines
 194         dcbz    0,r4                                                    // avoid read of dest line
 195         lfd             f0,0(r3)
 196         lfd             f1,8(r3)
 197         lfd             f2,16(r3)
 198         lfd             f3,24(r3)
 199         addi    r3,r3,32
 200         stfd    f0,0(r4)
 201         stfd    f1,8(r4)
 202         stfd    f2,16(r4)
 203         stfd    f3,24(r4)
 204         dcbst   0,r4                                                    // flush dest line to RAM
 205         addi    r4,r4,32
 206         bdnz    pmap_g3_copy_loop
 207
 208         sync                                                                    // wait for stores to take
 209         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
 210         li              r6,PPC_PGBYTES-32                               // point to last line in page
 211 pmap_g3_icache_flush:
 212         subic.  r5,r6,32                                                // more to go?
 213         icbi    r4,r6                                                   // flush another line in icache
 214         subi    r6,r5,32                                                // get offset to next line
 215         icbi    r4,r5
 216         bne             pmap_g3_icache_flush
 217
 218         sync
 219         mtmsr   r2                                                              // turn DR back on
 220         isync
 221         lfd             f0,FM_SIZE+0(r1)                                // restore the FPRs
 222         lfd             f1,FM_SIZE+8(r1)
 223         lfd             f2,FM_SIZE+16(r1)
 224         lfd             f3,FM_SIZE+24(r1)
 225
 226         b               pmap_g4_restore                                 // restore MSR and done
 227
 228
 229         // G4 -- copy using VRs
 230
 231 pmap_copy_g4:                                                                   // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
 232         la              r9,FM_SIZE+16(r1)                               // place where we save VRs to r9
 233         li              r5,16                                                   // load x-form offsets into r5-r9
 234         li              r6,32                                                   // another offset
 235         stvx    v0,0,r9                                                 // save some VRs so we can use to copy
 236         li              r7,48                                                   // another offset
 237         stvx    v1,r5,r9
 238         li              r0,PPC_PGBYTES/64                               // we loop over 64-byte chunks
 239         stvx    v2,r6,r9
 240         mtctr   r0
 241         li              r8,96                                                   // get look-ahead for touch
 242         stvx    v3,r7,r9
 243         li              r9,128
 244         mtmsr   r12                                                             // now we've saved VRs on stack, turn off DR
 245         isync                                                                   // wait for it to happen
 246         b               pmap_g4_copy_loop
 247
 248         .align  5                                                               // align inner loops
 249 pmap_g4_copy_loop:                                                              // loop over 64-byte chunks
 250         dcbt    r3,r8                                                   // touch 3 lines ahead
 251         nop                                                                             // avoid a 17-word loop...
 252         dcbt    r3,r9                                                   // touch 4 lines ahead
 253         nop                                                                             // more padding
 254         dcba    0,r4                                                    // avoid pre-fetch of 1st dest line
 255         lvx             v0,0,r3                                                 // offset 0
 256         lvx             v1,r5,r3                                                // offset 16
 257         lvx             v2,r6,r3                                                // offset 32
 258         lvx             v3,r7,r3                                                // offset 48
 259         addi    r3,r3,64
 260         dcba    r6,r4                                                   // avoid pre-fetch of 2nd line
 261         stvx    v0,0,r4                                                 // offset 0
 262         stvx    v1,r5,r4                                                // offset 16
 263         stvx    v2,r6,r4                                                // offset 32
 264         stvx    v3,r7,r4                                                // offset 48
 265         dcbf    0,r4                                                    // push line 1
 266         dcbf    r6,r4                                                   // and line 2
 267         addi    r4,r4,64
 268         bdnz    pmap_g4_copy_loop
 269
 270         sync                                                                    // wait for stores to take
 271         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
 272         li              r8,PPC_PGBYTES-32                               // point to last line in page
 273 pmap_g4_icache_flush:
 274         subic.  r9,r8,32                                                // more to go?
 275         icbi    r4,r8                                                   // flush from icache
 276         subi    r8,r9,32                                                // get offset to next line
 277         icbi    r4,r9
 278         bne             pmap_g4_icache_flush
 279
 280         sync
 281         mtmsr   r2                                                              // turn DR back on
 282         isync
 283         la              r9,FM_SIZE+16(r1)                               // get base of VR save area
 284         lvx             v0,0,r9                                                 // restore the VRs
 285         lvx             v1,r5,r9
 286         lvx             v2,r6,r9
 287         lvx             v3,r7,r9
 288
 289 pmap_g4_restore:                                                                // r11=MSR
 290         mtmsr   r11                                                             // turn EE on, VEC and FR off
 291         isync                                                                   // wait for it to happen
 292         addi    r1,r1,kSFSize                                   // pop off our stack frame
 293         lwz             r0,8(r1)                                                // restore return address
 294         mtlr    r0
 295         blr
 296
 297
 298         // 64-bit/128-byte processor: copy using VRs
 299
 300 pmap_copy_64:                                                                   // r10=features, r11=old MSR
 301                 sldi    r3,r3,12                                                // get page address from page num
 302                 sldi    r4,r4,12                                                // get page address from page num
 303                 la              r9,FM_SIZE+16(r1)                               // get base of VR save area
 304         li              r5,16                                                   // load x-form offsets into r5-r9
 305         li              r6,32                                                   // another offset
 306         bf              pfAltivecb,pmap_novmx_copy              // altivec suppressed...
 307         stvx    v0,0,r9                                                 // save 8 VRs so we can copy wo bubbles
 308         stvx    v1,r5,r9
 309         li              r7,48                                                   // another offset
 310         li              r0,PPC_PGBYTES/128                              // we loop over 128-byte chunks
 311         stvx    v2,r6,r9
 312         stvx    v3,r7,r9
 313         addi    r9,r9,64                                                // advance base ptr so we can store another 4
 314         mtctr   r0
 315         li              r0,MASK(MSR_DR)                                 // get DR bit
 316         stvx    v4,0,r9
 317         stvx    v5,r5,r9
 318         andc    r12,r2,r0                                               // turn off DR bit
 319         li              r0,1                                                    // get a 1 to slam into SF
 320         stvx    v6,r6,r9
 321         stvx    v7,r7,r9
 322         rldimi  r12,r0,63,MSR_SF_BIT                    // set SF bit (bit 0)
 323         li              r8,-128                                                 // offset so we can reach back one line
 324         mtmsrd  r12                                                             // now we've saved VRs, turn DR off and SF on
 325         isync                                                                   // wait for it to happen
 326         dcbt128 0,r3,1                                                  // start a forward stream
 327         b               pmap_64_copy_loop
 328
 329         .align  5                                                               // align inner loops
 330 pmap_64_copy_loop:                                                              // loop over 128-byte chunks
 331         dcbz128 0,r4                                                    // avoid read of destination line
 332         lvx             v0,0,r3                                                 // offset 0
 333         lvx             v1,r5,r3                                                // offset 16
 334         lvx             v2,r6,r3                                                // offset 32
 335         lvx             v3,r7,r3                                                // offset 48
 336         addi    r3,r3,64                                                // don't have enough GPRs so add 64 2x
 337         lvx             v4,0,r3                                                 // offset 64
 338         lvx             v5,r5,r3                                                // offset 80
 339         lvx             v6,r6,r3                                                // offset 96
 340         lvx             v7,r7,r3                                                // offset 112
 341         addi    r3,r3,64
 342         stvx    v0,0,r4                                                 // offset 0
 343         stvx    v1,r5,r4                                                // offset 16
 344         stvx    v2,r6,r4                                                // offset 32
 345         stvx    v3,r7,r4                                                // offset 48
 346         addi    r4,r4,64
 347         stvx    v4,0,r4                                                 // offset 64
 348         stvx    v5,r5,r4                                                // offset 80
 349         stvx    v6,r6,r4                                                // offset 96
 350         stvx    v7,r7,r4                                                // offset 112
 351         addi    r4,r4,64
 352         dcbf    r8,r4                                                   // flush the line we just wrote
 353         bdnz    pmap_64_copy_loop
 354
 355         sync                                                                    // wait for stores to take
 356         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
 357         li              r8,PPC_PGBYTES-128                              // point to last line in page
 358 pmap_64_icache_flush:
 359         subic.  r9,r8,128                                               // more to go?
 360         icbi    r4,r8                                                   // flush from icache
 361         subi    r8,r9,128                                               // get offset to next line
 362         icbi    r4,r9
 363         bne             pmap_64_icache_flush
 364
 365         sync
 366         mtmsrd  r2                                                              // turn DR back on, SF off
 367         isync
 368         la              r9,FM_SIZE+16(r1)                               // get base address of VR save area on stack
 369         lvx             v0,0,r9                                                 // restore the VRs
 370         lvx             v1,r5,r9
 371         lvx             v2,r6,r9
 372         lvx             v3,r7,r9
 373         addi    r9,r9,64
 374         lvx             v4,0,r9
 375         lvx             v5,r5,r9
 376         lvx             v6,r6,r9
 377         lvx             v7,r7,r9
 378
 379         b               pmap_g4_restore                                 // restore lower half of MSR and return
 380
 381  //
 382  //             Copy on 64-bit without VMX
 383  //
 384
 385 pmap_novmx_copy:
 386                 li              r0,PPC_PGBYTES/128                              // we loop over 128-byte chunks
 387                 mtctr   r0
 388                 li              r0,MASK(MSR_DR)                                 // get DR bit
 389                 andc    r12,r2,r0                                               // turn off DR bit
 390                 li              r0,1                                                    // get a 1 to slam into SF
 391                 rldimi  r12,r0,63,MSR_SF_BIT                    // set SF bit (bit 0)
 392                 mtmsrd  r12                                                             // now we've saved VRs, turn DR off and SF on
 393                 isync                                                                   // wait for it to happen
 394                 dcbt128 0,r3,1                                                  // start a forward stream
 395
 396 pmap_novmx_copy_loop:                                                   // loop over 128-byte cache lines
 397         dcbz128 0,r4                                                    // avoid read of dest line
 398
 399         ld              r0,0(r3)                                                // Load half a line
 400         ld              r12,8(r3)
 401         ld              r5,16(r3)
 402         ld              r6,24(r3)
 403         ld              r7,32(r3)
 404         ld              r8,40(r3)
 405         ld              r9,48(r3)
 406         ld              r10,56(r3)
 407
 408         std             r0,0(r4)                                                // Store half a line
 409         std             r12,8(r4)
 410         std             r5,16(r4)
 411         std             r6,24(r4)
 412         std             r7,32(r4)
 413         std             r8,40(r4)
 414         std             r9,48(r4)
 415         std             r10,56(r4)
 416
 417         ld              r0,64(r3)                                               // Load half a line
 418         ld              r12,72(r3)
 419         ld              r5,80(r3)
 420         ld              r6,88(r3)
 421         ld              r7,96(r3)
 422         ld              r8,104(r3)
 423         ld              r9,112(r3)
 424         ld              r10,120(r3)
 425
 426         addi    r3,r3,128
 427
 428         std             r0,64(r4)                                               // Store half a line
 429         std             r12,72(r4)
 430         std             r5,80(r4)
 431         std             r6,88(r4)
 432         std             r7,96(r4)
 433         std             r8,104(r4)
 434         std             r9,112(r4)
 435         std             r10,120(r4)
 436
 437         dcbf    0,r4                                                    // flush the line we just wrote
 438                 addi    r4,r4,128
 439         bdnz    pmap_novmx_copy_loop
 440
 441         sync                                                                    // wait for stores to take
 442         subi    r4,r4,PPC_PGBYTES                               // restore ptr to destintation page
 443         li              r8,PPC_PGBYTES-128                              // point to last line in page
 444
 445 pmap_novmx_icache_flush:
 446         subic.  r9,r8,128                                               // more to go?
 447         icbi    r4,r8                                                   // flush from icache
 448         subi    r8,r9,128                                               // get offset to next line
 449         icbi    r4,r9
 450         bne             pmap_novmx_icache_flush
 451
 452         sync
 453         mtmsrd  r2                                                              // turn DR back on, SF off
 454         isync
 455
 456         b               pmap_g4_restore                                 // restore lower half of MSR and return
 457
 458
 459
 460 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 461
 462 // Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
 463 // These routines all run both on 32 and 64-bit machines, though because they are called
 464 // by the BSD kernel they are always in 32-bit mode when entered.  The mapped ptr returned
 465 // by MapUserMemoryWindow will be 64 bits however on 64-bit machines.  Beware to avoid
 466 // using compare instructions on this ptr.  This mapped ptr is kept globally in r31, so there
 467 // is no need to store or load it, which are mode-dependent operations since it could be
 468 // 32 or 64 bits.
 469
 470 #define kkFrameSize     (FM_SIZE+32)
 471
 472 #define kkBufSize       (FM_SIZE+0)
 473 #define kkCR3           (FM_SIZE+4)
 474 #define kkSource        (FM_SIZE+8)
 475 #define kkDest          (FM_SIZE+12)
 476 #define kkCountPtr      (FM_SIZE+16)
 477 #define kkR31Save       (FM_SIZE+20)
 478 #define kkThrErrJmp     (FM_SIZE+24)
 479
 480
 481 // nonvolatile CR bits we use as flags in cr3
 482
 483 #define kk64bit         12
 484 #define kkNull          13
 485 #define kkIn            14
 486 #define kkString        15
 487 #define kkZero          15
 488
 489
 490 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 491 /*
 492  * int
 493  * copyoutstr(src, dst, maxcount, count)
 494  *      vm_offset_t     src;        // r3
 495  *      addr64_t        dst;        // r4 and r5
 496  *      vm_size_t       maxcount;   // r6
 497  *      vm_size_t*      count;      // r7
 498  *
 499  * Set *count to the number of bytes copied.
 500  */
 501
 502 ENTRY(copyoutstr, TAG_NO_FRAME_USED)
 503         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
 504         mr      r10,r4                          // move high word of 64-bit user address to r10
 505         li              r0,0
 506         crset   kkString                                                // flag as a string op
 507         mr      r11,r5                          // move low word of 64-bit user address to r11
 508         stw             r0,0(r7)                                                // initialize #bytes moved
 509         crclr   kkIn                                                    // flag as copyout
 510         b               copyJoin
 511
 512
 513 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 514 /*
 515  * int
 516  * copyinstr(src, dst, maxcount, count)
 517  *      addr64_t        src;        // r3 and r4
 518  *      vm_offset_t     dst;        // r5
 519  *      vm_size_t       maxcount;   // r6
 520  *      vm_size_t*      count;      // r7
 521  *
 522  * Set *count to the number of bytes copied
 523  * If dst == NULL, don't copy, just count bytes.
 524  * Only currently called from klcopyinstr.
 525  */
 526
 527 ENTRY(copyinstr, TAG_NO_FRAME_USED)
 528         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
 529         cmplwi  r5,0                                                    // dst==NULL?
 530         mr      r10,r3                          // move high word of 64-bit user address to r10
 531         li              r0,0
 532         crset   kkString                                                // flag as a string op
 533         mr      r11,r4                          // move low word of 64-bit user address to r11
 534         crmove  kkNull,cr0_eq                                   // remember if (dst==NULL)
 535         stw             r0,0(r7)                                                // initialize #bytes moved
 536         crset   kkIn                                                    // flag as copyin (rather than copyout)
 537         b               copyJoin1                                               // skip over the "crclr kkNull"
 538
 539
 540 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 541 /*
 542  * int
 543  * copyout(src, dst, count)
 544  *      vm_offset_t     src;        // r3
 545  *      addr64_t        dst;        // r4 and r5
 546  *      size_t          count;      // r6
 547  */
 548
 549                         .align  5
 550                         .globl  EXT(copyout)
 551                         .globl  EXT(copyoutmsg)
 552
 553 LEXT(copyout)
 554 LEXT(copyoutmsg)
 555
 556 #if INSTRUMENT
 557         mfspr   r12,pmc1                                                ; INSTRUMENT - saveinstr[12] - Take stamp at copyout
 558         stw             r12,0x6100+(12*16)+0x0(0)               ; INSTRUMENT - Save it
 559         mfspr   r12,pmc2                                                ; INSTRUMENT - Get stamp
 560         stw             r12,0x6100+(12*16)+0x4(0)               ; INSTRUMENT - Save it
 561         mfspr   r12,pmc3                                                ; INSTRUMENT - Get stamp
 562         stw             r12,0x6100+(12*16)+0x8(0)               ; INSTRUMENT - Save it
 563         mfspr   r12,pmc4                                                ; INSTRUMENT - Get stamp
 564         stw             r12,0x6100+(12*16)+0xC(0)               ; INSTRUMENT - Save it
 565 #endif
 566         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
 567         mr      r10,r4                          // move high word of 64-bit user address to r10
 568         crclr   kkString                                                // not a string version
 569         mr      r11,r5                          // move low word of 64-bit user address to r11
 570         crclr   kkIn                                                    // flag as copyout
 571         b               copyJoin
 572
 573
 574 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 575 /*
 576  * int
 577  * copyin(src, dst, count)
 578  *      addr64_t        src;        // r3 and r4
 579  *      vm_offset_t     dst;        // r5
 580  *      size_t          count;      // r6
 581  */
 582
 583
 584                         .align  5
 585                         .globl  EXT(copyin)
 586                         .globl  EXT(copyinmsg)
 587
 588 LEXT(copyin)
 589 LEXT(copyinmsg)
 590
 591         mfcr    r2,0x10                         // save caller's cr3, which we use for flags
 592         mr      r10,r3                          // move high word of 64-bit user address to r10
 593         crclr   kkString                                                // not a string version
 594         mr      r11,r4                          // move low word of 64-bit user address to r11
 595         crset   kkIn                                                    // flag as copyin
 596
 597
 598 // Common code to handle setup for all the copy variants:
 599 //              r2 = caller's cr3
 600 //      r3 = source if copyout
 601 //      r5 = dest if copyin
 602 //      r6 = buffer length or count
 603 //      r7 = count output ptr (if kkString set)
 604 //         r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
 605 //         r11 = low word of 64-bit user-space address
 606 //     cr3 = kkIn, kkString, kkNull flags
 607
 608 copyJoin:
 609         crclr   kkNull                                                  // (dst==NULL) convention not used with this call
 610 copyJoin1:                                                                              // enter from copyinstr with kkNull set
 611                 mflr    r0                                                              // get return address
 612         cmplwi  r6,0                                                    // buffer length 0?
 613         lis             r9,0x1000                                               // r9 <- 0x10000000 (256MB)
 614                 stw             r0,FM_LR_SAVE(r1)                               // save return
 615         cmplw   cr1,r6,r9                                               // buffer length > 256MB ?
 616         mfsprg  r8,2                                                    // get the features
 617         beq--   copyinout_0                                             // 0 length is degenerate case
 618                 stwu    r1,-kkFrameSize(r1)                             // set up stack frame
 619         stw             r2,kkCR3(r1)                    // save caller's cr3, which we use for flags
 620         mtcrf   0x02,r8                                                 // move pf64Bit to cr6
 621         stw             r3,kkSource(r1)                                 // save args across MapUserMemoryWindow
 622         stw             r5,kkDest(r1)
 623         stw             r6,kkBufSize(r1)
 624         crmove  kk64bit,pf64Bitb                                // remember if this is a 64-bit processor
 625         stw             r7,kkCountPtr(r1)
 626         stw             r31,kkR31Save(r1)                               // we use r31 globally for mapped user ptr
 627         li              r31,0                                                   // no mapped ptr yet
 628
 629
 630 // Handle buffer length > 256MB.  This is an error (ENAMETOOLONG) on copyin and copyout.
 631 // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
 632 // the buffer length to 256MB.  This isn't an issue if the string is less than 256MB
 633 // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG.  This restriction
 634 // is due to MapUserMemoryWindow; we don't want to consume more than two segments for
 635 // the mapping.
 636
 637         ble++   cr1,copyin0                                             // skip if buffer length <= 256MB
 638         bf              kkString,copyinout_too_big              // error if not string op
 639         mr              r6,r9                                                   // silently clamp buffer length to 256MB
 640         stw             r9,kkBufSize(r1)                                // update saved copy too
 641
 642
 643 // Set up thread_recover in case we hit an illegal address.
 644
 645 copyin0:
 646                 mfsprg  r8,1                                                    // Get the current thread
 647                 lis             r2,hi16(copyinout_error)
 648                 ori             r2,r2,lo16(copyinout_error)
 649                 lwz             r4,THREAD_RECOVER(r8)
 650                 lwz             r3,ACT_VMMAP(r8)                                // r3 <- vm_map virtual address
 651                 stw             r2,THREAD_RECOVER(r8)
 652                 stw             r4,kkThrErrJmp(r1)
 653
 654
 655 // Map user segment into kernel map, turn on 64-bit mode.  At this point:
 656 //              r3 = vm map
 657 //              r6 = buffer length
 658 // r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
 659 //
 660 // When we call MapUserMemoryWindow, we pass:
 661 //      r3 = vm map ptr
 662 //   r4/r5 = 64-bit user space address as an addr64_t
 663
 664         mr      r4,r10                          // copy user ptr into r4/r5
 665         mr      r5,r11
 666 #if INSTRUMENT
 667         mfspr   r12,pmc1                                                ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
 668         stw             r12,0x6100+(13*16)+0x0(0)               ; INSTRUMENT - Save it
 669         mfspr   r12,pmc2                                                ; INSTRUMENT - Get stamp
 670         stw             r12,0x6100+(13*16)+0x4(0)               ; INSTRUMENT - Save it
 671         mfspr   r12,pmc3                                                ; INSTRUMENT - Get stamp
 672         stw             r12,0x6100+(13*16)+0x8(0)               ; INSTRUMENT - Save it
 673         mfspr   r12,pmc4                                                ; INSTRUMENT - Get stamp
 674         stw             r12,0x6100+(13*16)+0xC(0)               ; INSTRUMENT - Save it
 675 #endif
 676         bl              EXT(MapUserMemoryWindow)                // get r3/r4 <- 64-bit address in kernel map of user operand
 677 #if INSTRUMENT
 678         mfspr   r12,pmc1                                                ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
 679         stw             r12,0x6100+(14*16)+0x0(0)               ; INSTRUMENT - Save it
 680         mfspr   r12,pmc2                                                ; INSTRUMENT - Get stamp
 681         stw             r12,0x6100+(14*16)+0x4(0)               ; INSTRUMENT - Save it
 682         mfspr   r12,pmc3                                                ; INSTRUMENT - Get stamp
 683         stw             r12,0x6100+(14*16)+0x8(0)               ; INSTRUMENT - Save it
 684         mfspr   r12,pmc4                                                ; INSTRUMENT - Get stamp
 685         stw             r12,0x6100+(14*16)+0xC(0)               ; INSTRUMENT - Save it
 686 #endif
 687         mr              r31,r4                                                  // r31 <- mapped ptr into user space (may be 64-bit)
 688         bf--    kk64bit,copyin1                                 // skip if a 32-bit processor
 689
 690                 rldimi  r31,r3,32,0                                             // slam high-order bits into mapped ptr
 691         mfmsr   r4                                                              // if 64-bit, turn on SF so we can use returned ptr
 692         li              r0,1
 693         rldimi  r4,r0,63,MSR_SF_BIT                             // light bit 0
 694         mtmsrd  r4                                                              // turn on 64-bit mode
 695         isync                                                                   // wait for mode to change
 696
 697
 698 // Load r3-r5, substituting mapped ptr as appropriate.
 699
 700 copyin1:
 701         lwz             r5,kkBufSize(r1)                                // restore length to copy
 702         bf              kkIn,copyin2                                    // skip if copyout
 703         lwz             r4,kkDest(r1)                                   // copyin: dest is kernel ptr
 704         mr              r3,r31                                                  // source is mapped ptr
 705         b               copyin3
 706 copyin2:                                                                                // handle copyout
 707         lwz             r3,kkSource(r1)                                 // source is kernel buffer (r3 at entry)
 708         mr              r4,r31                                                  // dest is mapped ptr into user space
 709
 710
 711 // Finally, all set up to copy:
 712 //              r3 = source ptr (mapped if copyin)
 713 //              r4 = dest ptr (mapped if copyout)
 714 //              r5 = length
 715 //         r31 = mapped ptr returned by MapUserMemoryWindow
 716 //         cr3 = kkIn, kkString, kk64bit, and kkNull flags
 717
 718 copyin3:
 719         bt              kkString,copyString                             // handle copyinstr and copyoutstr
 720         bl              EXT(bcopy)                                              // copyin and copyout: let bcopy do the work
 721         li              r3,0                                                    // return success
 722
 723
 724 // Main exit point for copyin, copyout, copyinstr, and copyoutstr.  Also reached
 725 // from error recovery if we get a DSI accessing user space.  Clear recovery ptr,
 726 // and pop off frame.
 727 //              r3 = 0, EFAULT, or ENAMETOOLONG
 728
 729 copyinx:
 730         lwz             r2,kkCR3(r1)                    // get callers cr3
 731                 mfsprg  r6,1                                                    // Get the current thread
 732         bf--    kk64bit,copyinx1                                // skip if 32-bit processor
 733         mfmsr   r12
 734         rldicl  r12,r12,0,MSR_SF_BIT+1                  // if 64-bit processor, turn 64-bit mode off
 735         mtmsrd  r12                                                             // turn SF off
 736         isync                                                                   // wait for the mode to change
 737 copyinx1:
 738                 lwz             r0,FM_LR_SAVE+kkFrameSize(r1)   // get return address
 739         lwz             r31,kkR31Save(r1)                               // restore callers r31
 740         lwz             r4,kkThrErrJmp(r1)                              // load saved thread recover
 741         addi    r1,r1,kkFrameSize                               // pop off our stack frame
 742                 mtlr    r0
 743                 stw             r4,THREAD_RECOVER(r6)                   // restore thread recover
 744         mtcrf   0x10,r2                                                 // restore cr3
 745                 blr
 746
 747
 748 /* We get here via the exception handler if an illegal
 749  * user memory reference was made.  This error handler is used by
 750  * copyin, copyout, copyinstr, and copyoutstr.  Registers are as
 751  * they were at point of fault, so for example cr3 flags are valid.
 752  */
 753
 754 copyinout_error:
 755         li              r3,EFAULT                                               // return error
 756         b               copyinx
 757
 758 copyinout_0:                                                                    // degenerate case: 0-length copy
 759                 mtcrf   0x10,r2                                                 // restore cr3
 760         li              r3,0                                                    // return success
 761         blr
 762
 763 copyinout_too_big:                                                              // degenerate case
 764         mtcrf   0x10,r2                                                 // restore cr3
 765         lwz             r1,0(r1)                                                // pop off stack frame
 766         li              r3,ENAMETOOLONG
 767         blr
 768
 769
 770 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 771 // Handle copyinstr and copyoutstr.  At this point the stack frame is set up,
 772 // the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
 773 // if necessary, and:
 774 //              r3 = source ptr, mapped if copyinstr
 775 //              r4 = dest ptr, mapped if copyoutstr
 776 //              r5 = buffer length
 777 //         r31 = mapped ptr returned by MapUserMemoryWindow
 778 //     cr3 = kkIn, kkString, kkNull, and kk64bit flags
 779 // We do word copies unless the buffer is very short, then use a byte copy loop
 780 // for the leftovers if necessary.  The crossover at which the word loop becomes
 781 // faster is about seven bytes, counting the zero.
 782 //
 783 // We first must word-align the source ptr, in order to avoid taking a spurious
 784 // page fault.
 785
 786 copyString:
 787         cmplwi  cr1,r5,15                                               // is buffer very short?
 788         mr      r12,r3                          // remember ptr to 1st source byte
 789         mtctr   r5                                                              // assuming short, set up loop count for bytes
 790         blt--   cr1,copyinstr8                                  // too short for word loop
 791         rlwinm  r2,r3,0,0x3                     // get byte offset of 1st byte within word
 792         rlwinm  r9,r3,3,0x18                    // get bit offset of 1st byte within word
 793         li      r7,-1
 794         sub     r3,r3,r2                        // word-align source address
 795         add     r6,r5,r2                        // get length starting at byte 0 in word
 796         srw     r7,r7,r9                        // get mask for bytes in first word
 797         srwi    r0,r6,2                                                 // get #words in buffer
 798         lwz     r5,0(r3)                        // get aligned word with first source byte
 799         lis             r10,hi16(0xFEFEFEFF)                    // load magic constants into r10 and r11
 800         lis             r11,hi16(0x80808080)
 801         mtctr   r0                                                              // set up word loop count
 802         addi    r3,r3,4                         // advance past the source word
 803         ori             r10,r10,lo16(0xFEFEFEFF)
 804         ori             r11,r11,lo16(0x80808080)
 805         orc     r8,r5,r7                        // map bytes preceeding first source byte into 0xFF
 806         bt--    kkNull,copyinstr5enter          // enter loop that just counts
 807
 808 // Special case 1st word, which has been 0xFF filled on left.  Note that we use
 809 // "and.", even though we execute both in 32 and 64-bit mode.  This is OK.
 810
 811         slw     r5,r5,r9                        // left justify payload bytes
 812         add             r9,r10,r8                                               // r9 =  data + 0xFEFEFEFF
 813         andc    r7,r11,r8                                               // r7 = ~data & 0x80808080
 814                 subfic  r0,r2,4                                                 // get r0 <- #payload bytes in 1st word
 815         and.    r7,r9,r7                                                // if r7==0, then all bytes in r8 are nonzero
 816         stw     r5,0(r4)                        // copy payload bytes to dest buffer
 817         add             r4,r4,r0                                                // then point to next byte in dest buffer
 818         bdnzt   cr0_eq,copyinstr6               // use loop that copies if 0 not found
 819
 820         b               copyinstr7                      // 0 found (buffer can't be full)
 821
 822
 823 // Word loop(s).  They do a word-parallel search for 0s, using the following
 824 // inobvious but very efficient test:
 825 //              y =  data + 0xFEFEFEFF
 826 //              z = ~data & 0x80808080
 827 // If (y & z)==0, then all bytes in dataword are nonzero.  There are two copies
 828 // of this loop, one that just counts and another that copies.
 829 //              r3 = ptr to next word of source (word aligned)
 830 //              r4 = ptr to next byte in buffer
 831 //      r6 = original buffer length (adjusted to be word origin)
 832 //     r10 = 0xFEFEFEFE
 833 //     r11 = 0x80808080
 834 //     r12 = ptr to 1st source byte (used to determine string length)
 835
 836         .align  5                                                               // align inner loops for speed
 837 copyinstr5:                                                                             // version that counts but does not copy
 838         lwz     r8,0(r3)                                                // get next word of source
 839         addi    r3,r3,4                         // advance past it
 840 copyinstr5enter:
 841         add             r9,r10,r8                                               // r9 =  data + 0xFEFEFEFF
 842         andc    r7,r11,r8                                               // r7 = ~data & 0x80808080
 843         and.    r7,r9,r7                        // r7 = r9 & r7 ("." ok even in 64-bit mode)
 844         bdnzt   cr0_eq,copyinstr5                               // if r7==0, then all bytes in r8 are nonzero
 845
 846         b               copyinstr7
 847
 848         .align  5                                                               // align inner loops for speed
 849 copyinstr6:                                                                             // version that counts and copies
 850         lwz     r8,0(r3)                                                // get next word of source
 851         addi    r3,r3,4                         // advance past it
 852         addi    r4,r4,4                                                 // increment dest ptr while we wait for data
 853         add             r9,r10,r8                                               // r9 =  data + 0xFEFEFEFF
 854         andc    r7,r11,r8                                               // r7 = ~data & 0x80808080
 855         and.    r7,r9,r7                        // r7 = r9 & r7 ("." ok even in 64-bit mode)
 856         stw             r8,-4(r4)                                               // pack all 4 bytes into buffer
 857         bdnzt   cr0_eq,copyinstr6                               // if r7==0, then all bytes are nonzero
 858
 859
 860 // Either 0 found or buffer filled.  The above algorithm has mapped nonzero bytes to 0
 861 // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
 862 // mapped to 0x80.  We must mask out these false hits before searching for an 0x80 byte.
 863 //              r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
 864 //      r6 = original buffer length (adjusted to be word origin)
 865 //      r7 = computed vector of 0x00 and 0x80 bytes
 866 //      r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
 867 //     r12 = ptr to 1st source byte (used to determine string length)
 868 //     cr0 = beq set iff 0 not found
 869
 870 copyinstr7:
 871         rlwinm  r2,r8,7,0,31                                    // move 0x01 bits to 0x80 position
 872                 rlwinm  r6,r6,0,0x3                                             // mask down to partial byte count in last word
 873         andc    r7,r7,r2                                                // turn off false hits from 0x0100 worst case
 874         crnot   kkZero,cr0_eq                                   // 0 found iff cr0_eq is off
 875         srwi    r7,r7,8                         // we want to count the 0 as a byte xferred
 876                 cmpwi   r6,0                                                    // any bytes left over in last word?
 877         cntlzw  r7,r7                                                   // now we can find the 0 byte (ie, the 0x80)
 878         subi    r3,r3,4                         // back up r3 to point to 1st byte in r8
 879         srwi    r7,r7,3                                                 // convert 8,16,24,32 to 1,2,3,4
 880         add     r3,r3,r7                        // now r3 points one past 0 byte, or at 1st byte not xferred
 881         bt++    kkZero,copyinstr10                              // 0 found, so done
 882
 883         beq             copyinstr10                                             // r6==0, so buffer truly full
 884         mtctr   r6                                                              // 0 not found, loop over r6 bytes
 885         b               copyinstr8                                              // enter byte loop for last 1-3 leftover bytes
 886
 887
 888 // Byte loop.  This is used for very small buffers and for the odd bytes left over
 889 // after searching and copying words at a time.
 890 //      r3 = ptr to next byte of source
 891 //      r4 = ptr to next dest byte
 892 //     r12 = ptr to first byte of source
 893 //     ctr = count of bytes to check
 894
 895         .align  5                                                               // align inner loops for speed
 896 copyinstr8:                                                                             // loop over bytes of source
 897         lbz             r0,0(r3)                                                // get next byte of source
 898         addi    r3,r3,1
 899         addi    r4,r4,1                                                 // increment dest addr whether we store or not
 900         cmpwi   r0,0                                                    // the 0?
 901         bt--    kkNull,copyinstr9                               // don't store if copyinstr with NULL ptr
 902         stb             r0,-1(r4)
 903 copyinstr9:
 904         bdnzf   cr0_eq,copyinstr8                               // loop if byte not 0 and more room in buffer
 905
 906         crmove  kkZero,cr0_eq                                   // remember if 0 found or buffer filled
 907
 908
 909 // Buffer filled or 0 found.  Unwind and return.
 910 //      r3 = ptr to 1st source byte not transferred
 911 //     r12 = ptr to 1st source byte
 912 //     r31 = mapped ptr returned by MapUserMemoryWindow
 913 //     cr3 = kkZero set iff 0 found
 914
 915 copyinstr10:
 916         lwz             r9,kkCountPtr(r1)                               // get ptr to place to store count of bytes moved
 917         sub     r2,r3,r12                       // compute #bytes copied (including the 0)
 918         li              r3,0                                                    // assume success return status
 919         stw             r2,0(r9)                                                // store #bytes moved
 920         bt++    kkZero,copyinx                                  // we did find the 0 so return 0
 921         li              r3,ENAMETOOLONG                                 // buffer filled
 922         b               copyinx                                                 // join main exit routine
 923
 924 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 925 /*
 926  * int
 927  * copypv(source, sink, size, which)
 928  *      addr64_t        src;        // r3 and r4
 929  *      addr64_t        dst;        // r5 and r6
 930  *      size_t          size;           // r7
 931  *      int                     which;          // r8
 932  *
 933  * Operand size bytes are copied from operand src into operand dst. The source and
 934  * destination operand addresses are given as addr64_t, and may designate starting
 935  * locations in physical or virtual memory in any combination except where both are
 936  * virtual. Virtual memory locations may be in either the kernel or the current thread's
 937  * address space. Operand size may be up to 256MB.
 938  *
 939  * Operation is controlled by operand which, which offers these options:
 940  *              cppvPsrc : source operand is (1) physical or (0) virtual
 941  *              cppvPsnk : destination operand is (1) physical or (0) virtual
 942  *              cppvKmap : virtual operand is in (1) kernel or (0) current thread
 943  *              cppvFsnk : (1) flush destination before and after transfer
 944  *              cppvFsrc : (1) flush source before and after transfer
 945  *              cppvNoModSnk : (1) don't set source operand's changed bit(s)
 946  *              cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
 947  *
 948  * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
 949  * This section describes the operation of the new 64-bit path.
 950  *
 951  * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
 952  * window in the kernel address space into all of physical RAM plus the I/O hole. Since
 953  * the window's mappings specify the proper access policies for the underlying memory,
 954  * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
 955  * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
 956  * memory window, and are accessed with data relocation on. Virtual addresses are either
 957  * within the kernel, or are mapped into the kernel address space through the user memory
 958  * window. Because accesses to a virtual operand are performed with data relocation on,
 959  * the new path does not have to translate the address, disable/enable interrupts, lock
 960  * the mapping, or update referenced and changed bits.
 961  *
 962  * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
 963  * a substantial performance penalty for copypv operating in real mode. Utilizing the
 964  * new 64-bit path, transfer performance increases >100% on the G5.
 965  *
 966  * The attentive reader may notice that mtmsrd ops are not followed by isync ops as
 967  * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
 968  * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
 969  * required.
 970  *
 971  * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
 972  * to call 32-bit functions, which would lead to the high-order 32 bits of our values
 973  * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
 974  * in our own stack frame across calls to 32-bit functions.
 975  *
 976  */
 977
 978 // Map operand which bits into non-volatile CR2 and CR3 bits.
 979 #define whichAlign      ((3+1)*4)
 980 #define whichMask       0x007F0000
 981 #define pvPsnk          (cppvPsnkb - whichAlign)
 982 #define pvPsrc          (cppvPsrcb - whichAlign)
 983 #define pvFsnk          (cppvFsnkb - whichAlign)
 984 #define pvFsrc          (cppvFsrcb - whichAlign)
 985 #define pvNoModSnk      (cppvNoModSnkb - whichAlign)
 986 #define pvNoRefSrc      (cppvNoRefSrcb - whichAlign)
 987 #define pvKmap          (cppvKmapb - whichAlign)
 988 #define pvNoCache       cr2_lt
 989
 990                 .align  5
 991                 .globl  EXT(copypv)
 992
 993 LEXT(copypv)
 994         mfsprg  r10,2                                                   // get feature flags
 995         mtcrf   0x02,r10                                                // we need to test pf64Bit
 996         bt++    pf64Bitb,copypv_64                              // skip if 64-bit processor (only they take hint)
 997
 998         b               EXT(hw_copypv_32)                               // carry on with 32-bit copypv
 999
1000 // Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.
1001 copypv_64:
1002                 mfsprg  r9,1                                                    // get current thread
1003                 stwu    r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
1004                                                                                                 // allocate stack frame and link it
1005                 mflr    r0                                                              // get return address
1006                 mfcr    r10                                                             // get cr2 and cr3
1007                 lwz             r12,THREAD_RECOVER(r9)                  // get error callback
1008                 stw             r26,FM_ARG0+0x00(r1)                    // save non-volatile r26
1009                 stw             r27,FM_ARG0+0x04(r1)                    // save non-volatile r27
1010                 stw             r28,FM_ARG0+0x08(r1)                    // save non-volatile r28
1011                 stw             r29,FM_ARG0+0x0C(r1)                    // save non-volatile r29
1012                 stw             r30,FM_ARG0+0x10(r1)                    // save non-volatile r30
1013                 stw             r31,FM_ARG0+0x14(r1)                    // save non-volatile r31
1014                 stw             r12,FM_ARG0+0x20(r1)                    // save error callback
1015                 stw             r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1016                                                                                                 // save return address
1017                 stw             r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1018                                                                                                 // save non-volatile cr2 and cr3
1019
1020 // Non-volatile register usage in this routine is:
1021 //      r26: saved msr image
1022 //      r27: current pmap_t / virtual source address
1023 //      r28: destination virtual address
1024 //      r29: source address
1025 //      r30: destination address
1026 //      r31: byte count to copy
1027 //      cr2/3: parameter 'which' bits
1028
1029                 rlwinm  r8,r8,whichAlign,whichMask              // align and mask which bits
1030                 mr              r31,r7                                                  // copy size to somewhere non-volatile
1031                 mtcrf   0x20,r8                                                 // insert which bits into cr2 and cr3
1032                 mtcrf   0x10,r8                                                 // insert which bits into cr2 and cr3
1033                 rlwinm  r29,r3,0,1,0                                    // form source address high-order bits
1034                 rlwinm  r30,r5,0,1,0                                    // form destination address high-order bits
1035                 rlwimi  r29,r4,0,0,31                                   // form source address low-order bits
1036                 rlwimi  r30,r6,0,0,31                                   // form destination address low-order bits
1037                 crand   cr7_lt,pvPsnk,pvPsrc                    // are both operand addresses physical?
1038                 cntlzw  r0,r31                                                  // count leading zeroes in byte count
1039                 cror    cr7_eq,pvPsnk,pvPsrc                    // cr7_eq <- source or destination is physical
1040                 bf--    cr7_eq,copypv_einval                    // both operands may not be virtual
1041                 cmplwi  r0,4                                                    // byte count greater than or equal 256M (2**28)?
1042                 blt--   copypv_einval                                   // byte count too big, give EINVAL
1043                 cmplwi  r31,0                                                   // byte count zero?
1044                 beq--   copypv_zero                                             // early out
1045                 bt              cr7_lt,copypv_phys                              // both operand addresses are physical
1046                 mr              r28,r30                                                 // assume destination is virtual
1047                 bf              pvPsnk,copypv_dv                                // is destination virtual?
1048                 mr              r28,r29                                                 // no, so source must be virtual
1049 copypv_dv:
1050                 lis             r27,ha16(EXT(kernel_pmap))              // get kernel's pmap_t *, high-order
1051                 lwz             r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
1052                 bt              pvKmap,copypv_kern                              // virtual address in kernel map?
1053                 lwz             r3,ACT_VMMAP(r9)                                // get user's vm_map *
1054                 rldicl  r4,r28,32,32                                    // r4, r5 <- addr64_t virtual address
1055                 rldicl  r5,r28,0,32
1056                 std             r29,FM_ARG0+0x30(r1)                    // preserve 64-bit r29 across 32-bit call
1057                 std             r30,FM_ARG0+0x38(r1)                    // preserve 64-bit r30 across 32-bit call
1058                 bl              EXT(MapUserMemoryWindow)                // map slice of user space into kernel space
1059                 ld              r29,FM_ARG0+0x30(r1)                    // restore 64-bit r29
1060                 ld              r30,FM_ARG0+0x38(r1)                    // restore 64-bit r30
1061                 rlwinm  r28,r3,0,1,0                                    // convert relocated addr64_t virtual address
1062                 rlwimi  r28,r4,0,0,31                                   //  into a single 64-bit scalar
1063 copypv_kern:
1064
1065 // Since we'll be accessing the virtual operand with data-relocation on, we won't need to
1066 // update the referenced and changed bits manually after the copy. So, force the appropriate
1067 // flag bit on for the virtual operand.
1068                 crorc   pvNoModSnk,pvNoModSnk,pvPsnk    // for virtual dest, let hardware do ref/chg bits
1069                 crorc   pvNoRefSrc,pvNoRefSrc,pvPsrc    // for virtual source, let hardware do ref bit
1070
1071 // We'll be finding a mapping and looking at, so we need to disable 'rupts.
1072                 lis             r0,hi16(MASK(MSR_VEC))                  // get vector mask
1073                 ori             r0,r0,lo16(MASK(MSR_FP))                // insert fp mask
1074                 mfmsr   r26                                                             // save current msr
1075                 andc    r26,r26,r0                                              // turn off VEC and FP in saved copy
1076                 ori             r0,r0,lo16(MASK(MSR_EE))                // add EE to our mask
1077                 andc    r0,r26,r0                                               // disable EE in our new msr image
1078                 mtmsrd  r0                                                              // introduce new msr image
1079
1080 // We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
1081 // try to find a mapping corresponding to this address in order to determine whether the address
1082 // is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
1083 // (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
1084 // examine the mapping's caching-inhibited bit.
1085                 mr              r3,r27                                                  // r3 <- pmap_t pmap
1086                 rldicl  r4,r28,32,32                                    // r4, r5 <- addr64_t va
1087                 rldicl  r5,r28,0,32
1088                 la              r6,FM_ARG0+0x18(r1)                             // r6 <- addr64_t *nextva
1089                 li              r7,1                                                    // r7 <- int full, search nested mappings
1090                 std             r26,FM_ARG0+0x28(r1)                    // preserve 64-bit r26 across 32-bit calls
1091                 std             r28,FM_ARG0+0x30(r1)                    // preserve 64-bit r28 across 32-bit calls
1092                 std             r29,FM_ARG0+0x38(r1)                    // preserve 64-bit r29 across 32-bit calls
1093                 std             r30,FM_ARG0+0x40(r1)                    // preserve 64-bit r30 across 32-bit calls
1094                 bl              EXT(mapping_find)                               // find mapping for virtual operand
1095                 mr.             r3,r3                                                   // did we find it?
1096                 beq             copypv_nomapping                                // nope, so we'll assume it's cacheable
1097                 lwz             r4,mpVAddr+4(r3)                                // get low half of virtual addr for hw flags
1098                 rlwinm. r4,r4,0,mpIb-32,mpIb-32                 // caching-inhibited bit set?
1099                 crnot   pvNoCache,cr0_eq                                // if it is, use bcopy_nc
1100                 bl              EXT(mapping_drop_busy)                  // drop busy on the mapping
1101 copypv_nomapping:
1102                 ld              r26,FM_ARG0+0x28(r1)                    // restore 64-bit r26
1103                 ld              r28,FM_ARG0+0x30(r1)                    // restore 64-bit r28
1104                 ld              r29,FM_ARG0+0x38(r1)                    // restore 64-bit r29
1105                 ld              r30,FM_ARG0+0x40(r1)                    // restore 64-bit r30
1106                 mtmsrd  r26                                                             // restore msr to it's previous state
1107
1108 // Set both the source and destination virtual addresses to the virtual operand's address --
1109 // we'll overlay one of them with the physical operand's address.
1110                 mr              r27,r28                                                 // make virtual operand BOTH source AND destination
1111
1112 // Now we're ready to relocate the physical operand address(es) into the physical memory window.
1113 // Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
1114 // space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
1115 // we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
1116 copypv_phys:
1117                 ld              r6,lgPMWvaddr(0)                                // get physical memory window virtual address
1118                 bf              pvPsnk,copypv_dstvirt                   // is destination address virtual?
1119                 cntlzd  r4,r30                                                  // count leading zeros in destination address
1120                 cmplwi  r4,32                                                   // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1121                 cror    pvNoCache,cr0_eq,pvNoCache              // use bcopy_nc for I/O hole locations
1122                 add             r28,r30,r6                                              // relocate physical destination into physical window
1123 copypv_dstvirt:
1124                 bf              pvPsrc,copypv_srcvirt                   // is source address virtual?
1125                 cntlzd  r4,r29                                                  // count leading zeros in source address
1126                 cmplwi  r4,32                                                   // if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
1127                 cror    pvNoCache,cr0_eq,pvNoCache              // use bcopy_nc for I/O hole locations
1128                 add             r27,r29,r6                                              // relocate physical source into physical window
1129 copypv_srcvirt:
1130
1131 // Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
1132 // funny happens during the copy. So, we set a pointer to our error handler in the per-thread
1133 // control block.
1134                 mfsprg  r8,1                                                    // get current threads stuff
1135                 lis             r3,hi16(copypv_error)                   // get our error callback's address, high
1136                 ori             r3,r3,lo16(copypv_error)                // get our error callback's address, low
1137                 stw             r3,THREAD_RECOVER(r8)                   // set our error callback
1138
1139 // Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
1140 // 64-bit mode.
1141                 li              r0,1                                                    // get a handy one bit
1142                 mfmsr   r3                                                              // get current msr
1143                 rldimi  r3,r0,63,MSR_SF_BIT                             // set SF bit on in our msr copy
1144                 mtmsrd  r3                                                              // enter 64-bit mode
1145
1146 // If requested, flush data cache
1147 // Note that we don't flush, the code is being saved "just in case".
1148 #if 0
1149                 bf              pvFsrc,copypv_nfs                               // do we flush the source?
1150                 rldicl  r3,r27,32,32                                    // r3, r4 <- addr64_t source virtual address
1151                 rldicl  r4,r27,0,32
1152                 mr              r5,r31                                                  // r5 <- count (in bytes)
1153                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
1154                 bl              EXT(flush_dcache)                               // flush the source operand
1155 copypv_nfs:
1156                 bf              pvFsnk,copypv_nfdx                              // do we flush the destination?
1157                 rldicl  r3,r28,32,32                                    // r3, r4 <- addr64_t destination virtual address
1158                 rldicl  r4,r28,0,32
1159                 mr              r5,r31                                                  // r5 <- count (in bytes)
1160                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
1161                 bl              EXT(flush_dcache)                               // flush the destination operand
1162 copypv_nfdx:
1163 #endif
1164
1165 // Call bcopy or bcopy_nc to perform the copy.
1166                 mr              r3,r27                                                  // r3 <- source virtual address
1167                 mr              r4,r28                                                  // r4 <- destination virtual address
1168                 mr              r5,r31                                                  // r5 <- bytes to copy
1169                 bt              pvNoCache,copypv_nc                             // take non-caching route
1170                 bl              EXT(bcopy)                                              // call bcopy to do the copying
1171                 b               copypv_copydone
1172 copypv_nc:
1173                 bl              EXT(bcopy_nc)                                   // call bcopy_nc to do the copying
1174 copypv_copydone:
1175
1176 // If requested, flush data cache
1177 // Note that we don't flush, the code is being saved "just in case".
1178 #if 0
1179                 bf              pvFsrc,copypv_nfsx                              // do we flush the source?
1180                 rldicl  r3,r27,32,32                                    // r3, r4 <- addr64_t source virtual address
1181                 rldicl  r4,r27,0,32
1182                 mr              r5,r31                                                  // r5 <- count (in bytes)
1183                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
1184                 bl              EXT(flush_dcache)                               // flush the source operand
1185 copypv_nfsx:
1186                 bf              pvFsnk,copypv_nfd                               // do we flush the destination?
1187                 rldicl  r3,r28,32,32                                    // r3, r4 <- addr64_t destination virtual address
1188                 rldicl  r4,r28,0,32
1189                 mr              r5,r31                                                  // r5 <- count (in bytes)
1190                 li              r6,0                                                    // r6 <- boolean phys (false, not physical)
1191                 bl              EXT(flush_dcache)                               // flush the destination operand
1192 copypv_nfd:
1193 #endif
1194
1195 // Leave 64-bit mode.
1196                 mfmsr   r3                                                              // get current msr
1197                 rldicl  r3,r3,0,MSR_SF_BIT+1                    // clear SF bit in our copy
1198                 mtmsrd  r3                                                              // leave 64-bit mode
1199
1200 // If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
1201 // from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
1202 // mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
1203 // Note that this code is page-size sensitive, so it should probably be a part of our low-level
1204 // code in hw_vm.s.
1205                 bt              pvNoModSnk,copypv_nomod                 // skip destination update if not requested
1206                 std             r29,FM_ARG0+0x30(r1)                    // preserve 64-bit r29 across 32-bit calls
1207                 li              r26,1                                                   // r26 <- 4K-page count
1208                 mr              r27,r31                                                 // r27 <- byte count
1209                 rlwinm  r3,r30,0,20,31                                  // does destination cross a page boundary?
1210                 subfic  r3,r3,4096                                              //
1211                 cmplw   r3,r27                                                  //
1212                 blt             copypv_modnox                                   // skip if not crossing case
1213                 subf    r27,r3,r27                                              // r27 <- byte count less initial fragment
1214                 addi    r26,r26,1                                               // increment page count
1215 copypv_modnox:
1216                 srdi    r3,r27,12                                               // pages to update (not including crosser)
1217                 add             r26,r26,r3                                              // add in crosser
1218                 srdi    r27,r30,12                                              // r27 <- destination page number
1219 copypv_modloop:
1220                 mr              r3,r27                                                  // r3 <- destination page number
1221                 la              r4,FM_ARG0+0x18(r1)                             // r4 <- unsigned int *pindex
1222                 bl              EXT(mapping_phys_lookup)                // see if page is really there
1223                 mr.             r3,r3                                                   // is it?
1224                 beq--   copypv_modend                                   // nope, break out of modify loop
1225                 mr              r3,r27                                                  // r3 <- destination page number
1226                 bl              EXT(mapping_set_mod)                    // set page changed status
1227                 subi    r26,r26,1                                               // decrement page count
1228                 cmpwi   r26,0                                                   // done yet?
1229                 bgt             copypv_modloop                                  // nope, iterate
1230 copypv_modend:
1231                 ld              r29,FM_ARG0+0x30(r1)                    // restore 64-bit r29
1232 copypv_nomod:
1233                 bt              pvNoRefSrc,copypv_done                  // skip source update if not requested
1234 copypv_debugref:
1235                 li              r26,1                                                   // r26 <- 4K-page count
1236                 mr              r27,r31                                                 // r27 <- byte count
1237                 rlwinm  r3,r29,0,20,31                                  // does source cross a page boundary?
1238                 subfic  r3,r3,4096                                              //
1239                 cmplw   r3,r27                                                  //
1240                 blt             copypv_refnox                                   // skip if not crossing case
1241                 subf    r27,r3,r27                                              // r27 <- byte count less initial fragment
1242                 addi    r26,r26,1                                               // increment page count
1243 copypv_refnox:
1244                 srdi    r3,r27,12                                               // pages to update (not including crosser)
1245                 add             r26,r26,r3                                              // add in crosser
1246                 srdi    r27,r29,12                                              // r27 <- source page number
1247 copypv_refloop:
1248                 mr              r3,r27                                                  // r3 <- source page number
1249                 la              r4,FM_ARG0+0x18(r1)                             // r4 <- unsigned int *pindex
1250                 bl              EXT(mapping_phys_lookup)                // see if page is really there
1251                 mr.             r3,r3                                                   // is it?
1252                 beq--   copypv_done                                             // nope, break out of modify loop
1253                 mr              r3,r27                                                  // r3 <- source  page number
1254                 bl              EXT(mapping_set_ref)                    // set page referenced status
1255                 subi    r26,r26,1                                               // decrement page count
1256                 cmpwi   r26,0                                                   // done yet?
1257                 bgt             copypv_refloop                                  // nope, iterate
1258
1259 // Return, indicating success.
1260 copypv_done:
1261 copypv_zero:
1262                 li              r3,0                                                    // our efforts were crowned with success
1263
1264 // Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
1265 copypv_return:
1266                 mfsprg  r9,1                                                    // get current threads stuff
1267                 lwz             r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
1268                                                                                                 // get return address
1269                 lwz             r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
1270                                                                                                 // get non-volatile cr2 and cr3
1271                 lwz             r26,FM_ARG0+0x00(r1)                    // restore non-volatile r26
1272                 lwz             r27,FM_ARG0+0x04(r1)                    // restore non-volatile r27
1273                 mtlr    r0                                                              // restore return address
1274                 lwz             r28,FM_ARG0+0x08(r1)                    // restore non-volatile r28
1275                 mtcrf   0x20,r4                                                 // restore non-volatile cr2
1276                 mtcrf   0x10,r4                                                 // restore non-volatile cr3
1277                 lwz             r11,FM_ARG0+0x20(r1)                    // save error callback
1278                 lwz             r29,FM_ARG0+0x0C(r1)                    // restore non-volatile r29
1279                 lwz             r30,FM_ARG0+0x10(r1)                    // restore non-volatile r30
1280                 lwz             r31,FM_ARG0+0x14(r1)                    // restore non-volatile r31
1281                 stw             r11,THREAD_RECOVER(r9)                  // restore our error callback
1282                 lwz             r1,0(r1)                                                // release stack frame
1283
1284                 blr                                                                             // y'all come back now
1285
1286 // Invalid argument handler.
1287 copypv_einval:
1288                 li              r3,EINVAL                                               // invalid argument
1289                 b               copypv_return                                   // return
1290
1291 // Error encountered during bcopy or bcopy_nc.
1292 copypv_error:
1293                 mfmsr   r3                                                              // get current msr
1294                 rldicl  r3,r3,0,MSR_SF_BIT+1                    // clear SF bit in our copy
1295                 mtmsrd  r3                                                              // leave 64-bit mode
1296                 li              r3,EFAULT                                               // it was all his fault
1297                 b               copypv_return                                   // return